diff --git a/.gitignore b/.gitignore index fcbb18e667efb2eb74151572917015f302a33eff..4205259e44f53e38f85702960d2499ffcf11103b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ functions.dot functions.pdf **/*.vtk -build +build* dist pairs.egg-info output* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ecd4d90006027630096f1c57c268bb5d494e7da --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,266 @@ +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) +project(pairs CXX) +# Set default build type if none is specified +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build (Debug, Release, etc.)" FORCE) +endif() + +set(CMAKE_CXX_FLAGS_RELEASE "-O3") +set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG") + +option(USE_MPI "USE_MPI" ON) +option(COMPILE_CUDA "COMPILE_CUDA" OFF) +option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF) +option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF) + +if(GENERATE_WHOLE_PROGRAM AND BUILD_APP) + message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n + Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).") +endif() + +set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation") +if(NOT EXISTS ${INPUT_SCRIPT}) + message(FATAL_ERROR "INPUT_SCRIPT doesn't exist! Specify it with -DINPUT_SCRIPT=/path/to/script.py") +endif() +get_filename_component(INPUT_SCRIPT_NAME ${INPUT_SCRIPT} NAME_WE) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") + +#================================================================================ +# Setup directories ============================================================= +#================================================================================ +file(COPY data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/output) +if(EXISTS ${OUTPUT_DIR}) + file(REMOVE_RECURSE ${OUTPUT_DIR}) +endif() +file(MAKE_DIRECTORY ${OUTPUT_DIR}) + +#================================================================================ +# Generated header (internally used by runtime files) =========================== +#================================================================================ +# TODO: Unify all interfaces +set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/internal_interfaces) +set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/last_generated.hpp) +file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR}) + +#================================================================================ +# RUNTIME_COMMON_FILES ========================================================== +#================================================================================ +set(RUNTIME_COMMON_FILES + runtime/pairs.cpp + runtime/copper_fcc_lattice.cpp + runtime/create_body.cpp + runtime/dem_sc_grid.cpp + runtime/read_from_file.cpp + runtime/stats.cpp + runtime/thermo.cpp + runtime/timing.cpp + runtime/vtk.cpp + runtime/domain/regular_6d_stencil.cpp) + +#================================================================================ +# PAIRS_TARGET ================================================================== +#================================================================================ +set(PAIRS_TARGET "pairs") + +# PAIRS dependencies +set(PAIRS_LINK_LIBRARIES) +set(PAIRS_LINK_DIRS ${CMAKE_CURRENT_BINARY_DIR}) +set(PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) + +# The target can either be an executable or a static library +if(GENERATE_WHOLE_PROGRAM OR BUILD_APP) + add_executable(${PAIRS_TARGET} ${RUNTIME_COMMON_FILES}) +else() + add_library(${PAIRS_TARGET} STATIC ${RUNTIME_COMMON_FILES}) + list(APPEND PAIRS_LINK_LIBRARIES ${PAIRS_TARGET}) +endif() + +# Include P4IRS 'runtime' dir +target_include_directories(${PAIRS_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime) +list(APPEND PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/runtime) + +set_target_properties(${PAIRS_TARGET} PROPERTIES + CXX_STANDARD_REQUIRED ON + CXX_STANDARD 17 +) + +#================================================================================ +# USER_SOURCE_FILES ============================================================= +#================================================================================ +if(BUILD_APP) + set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)") + if(NOT USER_SOURCE_FILES) + message(FATAL_ERROR "BUILD_APP is ON. You have to specify source files like this:\n + -DUSER_SOURCE_FILES=src/main.cpp;src/helper.cpp") + endif() + + foreach(file ${USER_SOURCE_FILES}) + if(NOT EXISTS ${file}) + message(FATAL_ERROR "File '${file}' does not exist!") + endif() + endforeach() + target_sources(${PAIRS_TARGET} PRIVATE ${USER_SOURCE_FILES}) +endif() + +#================================================================================ +# waLBerla ====================================================================== +#================================================================================ +set(WALBERLA_DIR ${WALBERLA_DIR} CACHE PATH "Path to waLBerla source directory (required only when using BlockForest partitioning).") + +if(WALBERLA_DIR) + if(EXISTS "${WALBERLA_DIR}") + target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA) + else() + message(FATAL_ERROR "Invalid WALBERLA_DIR: '${WALBERLA_DIR}' does not exist.") + endif() + + set(RUNTIME_WALBERLA_FILES + runtime/domain/block_forest.cpp + ) + + # TODO: Generate the host/device functions for computing weights + if(COMPILE_CUDA) + list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cu) + else() + list(APPEND RUNTIME_WALBERLA_FILES runtime/boundary_weights.cpp) + endif() + + target_sources(${PAIRS_TARGET} PRIVATE ${RUNTIME_WALBERLA_FILES}) + + ## Linking walberla modules + set(PAIRS_WALBERLA_DEPENDENCIES blockforest core pe) + find_package(waLBerla REQUIRED) + set(WALBERLA_LINK_LIBRARIES_KEYWORD PUBLIC) + target_link_modules(${PAIRS_TARGET} ${PAIRS_WALBERLA_DEPENDENCIES}) # This is a walberla helper function + + ## TODO: PAIRS_LINK_DIRS and PAIRS_LINK_LIBRARIES for walberla modules *AND* their dependencies + ## This implemention only works if the consumer of the library is itself a walberla app (made within the build system of walberla) + list(APPEND PAIRS_LINK_LIBRARIES ${PAIRS_WALBERLA_DEPENDENCIES}) +endif() + +#================================================================================ +# Install pairs python package ================================================== +#================================================================================ +set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE STRING "Python executable") + +if(NOT PYTHON_EXECUTABLE) + set(PYTHON_EXECUTABLE python3) +endif() + +execute_process( + COMMAND ${PYTHON_EXECUTABLE} setup.py build + OUTPUT_QUIET + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +execute_process( + COMMAND ${PYTHON_EXECUTABLE} setup.py install --user + OUTPUT_QUIET + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +#================================================================================ +# CUDA ========================================================================== +#================================================================================ +if(COMPILE_CUDA) + find_package(CUDA REQUIRED) + enable_language(CUDA) + set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cu") + set(CUDA_ARCH ${CUDA_ARCH} CACHE STRING "CUDA_ARCH environment variable must be set.") + set(TARGET_ARG "gpu") + + # Default arch is 80 + if(NOT CUDA_ARCH) + set(CUDA_ARCH 80) + endif() + + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true") + + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -DDEBUG") + else() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") + endif() + + if(NOT DEFINED ENABLE_GPU_DIRECT) + set(ENABLE_GPU_DIRECT ON "Enable GPU Direct (default: ON when COMPILE_CUDA is ON)" FORCE) + else() + # User choice is respected here if they opt to COMPILE_CUDA without GPU Direct + set(ENABLE_GPU_DIRECT ${ENABLE_GPU_DIRECT} "Enable GPU Direct (user-defined)" FORCE) + endif() + + if(ENABLE_GPU_DIRECT) + target_compile_definitions(${PAIRS_TARGET} PRIVATE ENABLE_CUDA_AWARE_MPI) + endif() + + target_sources(${PAIRS_TARGET} PRIVATE runtime/devices/cuda.cu) + target_compile_definitions(${PAIRS_TARGET} PUBLIC PAIRS_TARGET_CUDA) + target_include_directories(${PAIRS_TARGET} PUBLIC ${CUDA_INCLUDE_DIRS}) + list(APPEND PAIRS_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + + set_target_properties(${PAIRS_TARGET} PROPERTIES + CUDA_RESOLVE_DEVICE_SYMBOLS ON + CUDA_STANDARD 17 + CUDA_SEPARABLE_COMPILATION ON + CUDA_ARCHITECTURES ${CUDA_ARCH}) + + target_link_libraries(${PAIRS_TARGET} PUBLIC ${CUDA_LIBRARIES}) + list(APPEND PAIRS_LINK_LIBRARIES ${CUDA_LIBRARIES}) +else() + if(ENABLE_GPU_DIRECT) + message(FATAL_ERROR "Invalid combination: ENABLE_GPU_DIRECT requires COMPILE_CUDA to be ON") + endif() + + set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cpp") + set(TARGET_ARG "cpu") + target_sources(${PAIRS_TARGET} PRIVATE runtime/devices/dummy.cpp) +endif() + +#================================================================================ +# Generate code and add them to PAIRS_TARGET ==================================== +#================================================================================ +add_custom_command( + OUTPUT ${GEN_SOURCES} ${GEN_INTERFACE_HEADER} + COMMAND ${PYTHON_EXECUTABLE} ${INPUT_SCRIPT} ${TARGET_ARG} + COMMENT "Generating code with P4IRS" + DEPENDS ${INPUT_SCRIPT}) + +add_custom_target(generated_code DEPENDS ${GEN_SOURCES} ${GEN_INTERFACE_HEADER}) +add_dependencies(${PAIRS_TARGET} generated_code) + +target_sources(${PAIRS_TARGET} PRIVATE ${GEN_SOURCES}) +target_include_directories(${PAIRS_TARGET} PRIVATE + ${GEN_INTERFACE_DIR} # Interface header USED INTERNALLY by pairs is located here + ${CMAKE_CURRENT_BINARY_DIR} # Generated source and header FOR USER is located here +) + +#================================================================================ +# MPI =========================================================================== +#================================================================================ +if(USE_MPI) + find_package(MPI REQUIRED) + include_directories(SYSTEM ${MPI_INCLUDE_PATH}) + target_link_libraries(${PAIRS_TARGET} PUBLIC ${MPI_LIBRARIES}) + list(APPEND PAIRS_LINK_LIBRARIES "${MPI_LIBRARIES}") + list(APPEND PAIRS_INCLUDE_DIRS "${MPI_INCLUDE_PATH}") +endif() + +#================================================================================ +# LIKWID ======================================================================== +#================================================================================ +if(LIKWID_DIR) + target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread) + + target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a) + list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a) + + include_directories(${LIKWID_DIR}/include) + list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include") +endif() + +#================================================================================ +# config file =================================================================== +#================================================================================ +configure_file ( "${CMAKE_SOURCE_DIR}/cmake/pairs-config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/pairs-config.cmake") diff --git a/Makefile b/Makefile deleted file mode 100644 index 967a1a9f117d174c758b82a6af3c41c296ded89c..0000000000000000000000000000000000000000 --- a/Makefile +++ /dev/null @@ -1,87 +0,0 @@ -.PHONY: all build clean - -# General settings -TESTCASE=md -PYCMD=python3 - -# C/C++ compiler settings -CC=mpic++ -CFLAGS=-O3 -mavx2 -mfma -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS} -#CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} -#CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} -DEBUG_FLAGS= -#DEBUG_FLAGS=-DDEBUG - -# CUDA settings -NVCC=nvcc -#NVCC_FLAGS=-O3 -mavx2 -mfma -NVCC_FLAGS=-O3 -arch=sm_80 -mavx2 -mfma -ffast-math -funroll-loops --forward-unknown-to-host-compiler -#NVCC_FLAGS=-O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler -NVCC_PATH:="$(shell which ${NVCC})" -CUDA_FLAGS=-DENABLE_CUDA_AWARE_MPI -CUDART_FLAGS=-lcudart -L /apps/SPACK/0.19.1/opt/linux-almalinux8-zen/gcc-8.5.0/nvhpc-23.7-bzxcokzjvx4stynglo4u2ffpljajzlam/Linux_x86_64/23.7/cuda/12.2/targets/x86_64-linux/lib - -# MPI settings -MPI_PATH=/apps/SPACK/0.19.1/opt/linux-almalinux8-zen/intel-2021.10.0/openmpi-4.1.6-ijsnjhq77rjc256wlrp52m37rsq6miff -MPI_FLAGS=-I${MPI_PATH}/include - -# Likwid settings -LIKWID_INC ?= -I/usr/local/include -LIKWID_DEFINES ?= -DLIKWID_PERFMON -LIKWID_LIB ?= -L/usr/local/lib -LIKWID_FLAGS = -llikwid ${LIKWID_INC} ${LIKWID_DEFINES} ${LIKWID_LIB} - -# Other -CPU_OBJ_PATH=obj_cpu -CPU_SRC="$(TESTCASE).cpp" -CPU_BIN="$(TESTCASE)_cpu" -GPU_OBJ_PATH=obj_gpu -GPU_SRC="$(TESTCASE).cu" -GPU_BIN="$(TESTCASE)_gpu" - -all: clean build $(CPU_BIN) $(GPU_BIN) - @echo "Everything was done!" - -build: - @echo "Building pairs package..." - $(PYCMD) setup.py build && $(PYCMD) setup.py install --user - -$(CPU_SRC): - @echo "Generating and compiling $(TESTCASE) example for CPU..." - @mkdir -p $(CPU_OBJ_PATH) - $(PYCMD) examples/$(TESTCASE).py cpu - -$(GPU_SRC): - @echo "Generating and compiling $(TESTCASE) example for GPU..." - @mkdir -p $(GPU_OBJ_PATH) - $(PYCMD) examples/$(TESTCASE).py gpu - -$(CPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(CPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(CPU_OBJ_PATH)/dummy.o: runtime/devices/dummy.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(GPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS) - -$(GPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS) - -$(GPU_OBJ_PATH)/cuda_runtime.o: runtime/devices/cuda.cu - $(NVCC) $(NVCC_FLAGS) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) - -# Targets -$(CPU_BIN): $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o - $(CC) $(CFLAGS) -o $(CPU_BIN) $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o $(DEBUG_FLAGS) - -$(GPU_BIN): $(GPU_SRC) $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(GPU_OBJ_PATH)/cuda_runtime.o - $(NVCC) $(NVCC_FLAGS) -c -o $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_SRC) $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) - $(CC) -o $(GPU_BIN) $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_OBJ_PATH)/cuda_runtime.o $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(CUDART_FLAGS) $(CUDA_FLAGS) $(CFLAGS) - -clean: - @echo "Cleaning..." - rm -rf build $(CPU_BIN) $(GPU_BIN) $(CPU_SRC) $(GPU_SRC) dist pairs.egg-info functions functions.pdf $(CPU_OBJ_PATH) $(GPU_OBJ_PATH) diff --git a/README.md b/README.md index bcef6a27a6d2ebb070f8098c59934bcc9fc73a43..882644c488aa6495080216615ba203927e6d0dd6 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,6 @@ P4IRS is an open-source, stand-alone compiler and domain-specific language for particle simulations which aims at generating optimized code for different target hardwares. It is released as a Python package and allows users to define kernels, integrators and other particle routines in a high-level and straightforward fashion without the need to implement any backend code. -## Build instructions - -There is a Makefile which contains configurable environment variables such as `TESTCASE` compiler parameters evaluate P4IRS performance on different scenarios. -`TESTCASE` refers to any of the files within the `examples` directory, such as `md` and `dem`. ## Usage @@ -104,6 +100,64 @@ else: psim.generate() ``` +## Build instructions + +P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment. + +In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build` + +**General CMake flags (applicable to all 3 modes):** +* Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py` +* Enable CUDA with `-DCOMPILE_CUDA=ON` +* Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule) + + +### 1. Whole-program generation: +--------------------- +In this mode, everything including the `main` function is generated by P4IRS. + +1. Set `generate_whole_program=True` in the input script +2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON` + +Example: Build [md.py](examples/whole-program-generation/md.py) +``` +cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON .. +``` +Now call `make` and an **executable** is built. + + +### 2. Modular stand-alone app +--------------------- +You can build a stand-alone C++ app which uses the P4IRS modular interface. + +1. Set `generate_whole_program=False` in the input script +2. Set the CMake flag `-DBUILD_APP=ON` +3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp` + +Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py) +Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory. + +``` +cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla .. +``` +Now call `make` and an **executable** is built. + + +### 3. P4IRS as a library +--------------------- +In this mode, P4IRS is compiled as a library that can be integrated into other projects. + +1. Set `generate_whole_program=False` in the input script +2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default) + +Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows: +```cmake +find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH) +target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS}) +target_link_directories(my_app PUBLIC ${PAIRS_LINK_DIRS}) +target_link_libraries(my_app PUBLIC ${PAIRS_LINK_LIBRARIES}) +``` + ## Citations TBD diff --git a/cmake/FindwaLBerla.cmake b/cmake/FindwaLBerla.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8f87e88a03902f1c1af3900a3d4d38b921996682 --- /dev/null +++ b/cmake/FindwaLBerla.cmake @@ -0,0 +1,13 @@ +if ( WALBERLA_DIR ) + # WALBERLA_DIR has to point to the waLBerla source directory + # this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument) + add_subdirectory( ${WALBERLA_DIR} walberla EXCLUDE_FROM_ALL) + + waLBerla_import() + # Adds the 'src' and 'tests' directory of current app + list( APPEND WALBERLA_MODULE_DIRS "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/tests" ) + list( REMOVE_DUPLICATES WALBERLA_MODULE_DIRS ) + set ( WALBERLA_MODULE_DIRS ${WALBERLA_MODULE_DIRS} CACHE INTERNAL "All folders that contain modules or tests" ) +else() + message( FATAL_ERROR "waLBerla not found - Use 'cmake -DWALBERLA_DIR=path_to_waLBerla_sources pathToApplicationSources' " ) +endif() diff --git a/cmake/pairs-config.cmake.in b/cmake/pairs-config.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..fdf10effa045063348fbe03d60608d6db3892cc2 --- /dev/null +++ b/cmake/pairs-config.cmake.in @@ -0,0 +1,6 @@ +set ( pairs_SOURCE_DIR @pairs_SOURCE_DIR@ ) +set ( pairs_BINARY_DIR @pairs_BINARY_DIR@ ) + +set ( PAIRS_LINK_LIBRARIES @PAIRS_LINK_LIBRARIES@ ) +set ( PAIRS_LINK_DIRS @PAIRS_LINK_DIRS@ ) +set ( PAIRS_INCLUDE_DIRS @PAIRS_INCLUDE_DIRS@ ) diff --git a/data/planes.input b/data/planes.input index 3c84ed90dba0e02f137fb4ae6308c48a56a53930..af3744e15f94a6ec863856ed5913c4fc18d344a3 100644 --- a/data/planes.input +++ b/data/planes.input @@ -1,2 +1,2 @@ -100000,0,1,0.0,0.0,0.0,0.0,0.0,1.0,13 -100001,0,1,0.8,0.015,0.2,0.0,0.0,-1.0,13 +0, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 13 +0, 1, 0.8, 0.015, 0.2, 0.0, 0.0, -1.0, 13 diff --git a/data/sd_planes.input b/data/sd_planes.input new file mode 100644 index 0000000000000000000000000000000000000000..42c7724c8b621dfca891624bc9c3d58aea6ca606 --- /dev/null +++ b/data/sd_planes.input @@ -0,0 +1,6 @@ +0, 1, 0, 0, 0, 1, 0, 0, 13 +0, 1, 0, 0, 0, 0, 1, 0, 13 +0, 1, 0, 0, 0, 0, 0, 1, 13 +0, 1, 10, 10, 10, -1, 0, 0, 13 +0, 1, 10, 10, 10, 0, -1, 0, 13 +0, 1, 10, 10, 10, 0, 0, -1, 13 diff --git a/examples/dem2.py b/examples/dem2.py deleted file mode 100644 index 7ebb47b1a335c4e0728a62500ecb0e2adcd81402..0000000000000000000000000000000000000000 --- a/examples/dem2.py +++ /dev/null @@ -1,111 +0,0 @@ -import pairs -import sys - - -def linear_spring_dashpot(i, j): - penetration_depth = squared_distance(i, j) - radius[i] - radius[j] - skip_when(penetration_depth >= 0.0) - - delta_ij = delta(i, j) - contact_normal = delta_ij / length(delta_ij) - k = radius[j] + 0.5 * penetration_depth - contact_point = position[j] + contact_normal * k - - rel_vel = -velocity_wf[i] - velocity_wf[j] - rel_vel_n = dot(rel_vel, contact_normal) * contact_normal - rel_vel_t = rel_vel - rel_vel_n - - fN = stiffness_norm[i, j] * (-penetration_depth) * contact_normal + damping_norm[i, j] * rel_vel_n; - - tan_spring_disp = tangential_spring_displacement[i, j] - impact_vel_magnitude = impact_velocity_magnitude[i, j] - impact_magnitude = select(impact_vel_magnitude > 0.0, impact_vel_magnitude, length(rel_vel)) - sticking = is_sticking[i, j] - - rotated_tan_disp = tan_spring_disp - contact_normal * (contact_normal * tan_spring_disp) - new_tan_spring_disp = dt * rel_vel_t + \ - select(squared_length(rotated_tan_disp) <= 0.0, - zero_vector(), - rotated_tan_disp * length(tan_spring_disp) / length(rotated_tan_disp)) - - fTLS = stiffness_tan[i, j] * new_tan_spring_disp + damping_tan[i, j] * rel_vel_t - fTLS_len = length(fTLS) - fTLS_inv_len = 1.0 / fTLS_len - t = select(fTLS_len > 0, fTLS / fTLS_inv_len, zero_vector()) - - f_friction_abs_static = friction_static[i, j] * length(fN) - f_friction_abs_dynamic = friction_dynamic[i, j] * length(fN) - tan_vel_threshold = 1e-8 - - cond1 = sticking == 1 and length(rel_vel_t) < tan_vel_threshold and fTLS_len < f_friction_abs_static - cond2 = sticking == 1 and fTLS_len < f_friction_abs_dynamic - f_friction_abs = select(cond1, f_friction_abs_static, f_friction_abs_dynamic) - n_sticking = select(cond1 or cond2 or fTLS_len < f_friction_abs_dynamic, 1, 0) - - if not cond1 and not cond2 and stiffness_tan[i, j] > 0.0: - tangential_spring_displacement[i, j] = \ - (f_friction_abs * t - damping_tan[i, j] * rel_vel_t) / stiffness_tan[i, j] - - else: - tangential_spring_displacement[i, j] = new_tan_spring_disp - - impact_velocity_magnitude[i, j] = impact_magnitude - is_sticking[i, j] = n_sticking - - fTabs = min(fTLS_len, f_friction_abs) - fT = fTabs * t - force[i] += fN + fT - - -def euler(i): - velocity[i] += dt * force[i] / mass[i] - position[i] += dt * velocity[i] - - -cmd = sys.argv[0] -target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" -if target != 'cpu' and target != 'gpu': - print(f"Invalid target, use {cmd} <cpu/gpu>") - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -ntypes = 4 -stiffness_norm = 1.0 -stiffness_tan = 1.0 -damping_norm = 1.0 -damping_tan = 1.0 -friction_static = 1.0 -friction_dynamic = 1.0 - -psim = pairs.simulation("dem", debug=True) -psim.add_position('position') -psim.add_property('mass', pairs.double(), 1.0) -psim.add_property('velocity', pairs.vector()) -psim.add_property('velocity_wf', pairs.vector()) -psim.add_property('force', pairs.vector(), vol=True) -psim.add_property('radius', pairs.double(), 1.0) -psim.add_feature('type', ntypes) -psim.add_feature_property('type', 'stiffness_norm', pairs.double(), [stiffness_norm for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'stiffness_tan', pairs.double(), [stiffness_tan for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'damping_norm', pairs.double(), [damping_norm for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'damping_tan', pairs.double(), [damping_tan for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'friction_static', pairs.double(), [friction_static for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'friction_dynamic', pairs.double(), [friction_dynamic for i in range(ntypes * ntypes)]) -psim.add_contact_property('is_sticking', pairs.int32(), False) -psim.add_contact_property('tangential_spring_displacement', pairs.vector(), [0.0, 0.0, 0.0]) -psim.add_contact_property('impact_velocity_magnitude', pairs.double(), 0.0) - -psim.read_particle_data("data/fluidized_bed.input", ['mass', 'position', 'velocity']) -psim.build_neighbor_lists(cutoff_radius + skin) -psim.vtk_output(f"output/test_{target}") -psim.compute(linear_spring_dashpot, cutoff_radius, symbols={'dt': dt}) -psim.compute(euler, symbols={'dt': dt}) - -if target == 'gpu': - psim.target(pairs.target_gpu()) -else: - psim.target(pairs.target_cpu()) - -psim.generate() diff --git a/examples/lift.py b/examples/lift.py deleted file mode 100644 index 2b48fdf02a0afdcc7075843cdab8a1852cee9e45..0000000000000000000000000000000000000000 --- a/examples/lift.py +++ /dev/null @@ -1,14 +0,0 @@ -from coupling.parse_cpp import parse_walberla_file -from coupling.parse_cpp import get_class_method, print_tree - -filename = "mesa_pd/kernel/SpringDashpot.hpp" -translation_unit = parse_walberla_file(filename) - -# subtree = get_subtree(tu.cursor, "walberla::mesa_pd::kernel") -# print_tree(subtree) - -kernel = get_class_method( - translation_unit.cursor, - "walberla::mesa_pd::kernel::SpringDashpot", - "operator()") -print_tree(kernel) diff --git a/examples/lj_embedded.py b/examples/lj_embedded.py deleted file mode 100644 index 160c6081bacc9e0a3cd6e8561ebff9ea2eb7bf15..0000000000000000000000000000000000000000 --- a/examples/lj_embedded.py +++ /dev/null @@ -1,32 +0,0 @@ -import pairs - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -sigma = 1.0 -epsilon = 1.0 -sigma6 = sigma ** 6 - -psim = pairs.simulation("lj") -mass = psim.add_real_property('mass', 1.0) -position = psim.add_vector_property('position') -velocity = psim.add_vector_property('velocity') -force = psim.add_vector_property('force', vol=True) -psim.from_file("data/minimd_setup_4x4x4.input", ['mass', 'position', 'velocity']) -psim.create_cell_lists(2.8, 2.8) -psim.create_neighbor_lists() -psim.periodic(2.8) -psim.vtk_output("output/test") - -for i, j, delta, rsq in psim.particle_pairs(cutoff_radius, position): - sr2 = 1.0 / rsq - sr6 = sr2 * sr2 * sr2 * sigma6 - f = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon - force[i].add(delta * f) - -for i in psim.particles(): - velocity[i].add(dt * force[i] / mass[i]) - position[i].add(dt * velocity[i]) - -psim.generate() diff --git a/examples/lj_onetype.py b/examples/lj_onetype.py deleted file mode 100644 index e703122d5469e96458dd831aedbfa4cf9c456090..0000000000000000000000000000000000000000 --- a/examples/lj_onetype.py +++ /dev/null @@ -1,45 +0,0 @@ -import pairs -import sys - - -def lj(i, j): - sr2 = 1.0 / rsq - sr6 = sr2 * sr2 * sr2 * sigma6 - force[i] += delta * 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon - - -def euler(i): - velocity[i] += dt * force[i] / mass[i] - position[i] += dt * velocity[i] - - -cmd = sys.argv[0] -target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" -if target != 'cpu' and target != 'gpu': - print(f"Invalid target, use {cmd} <cpu/gpu>") - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -sigma = 1.0 -epsilon = 1.0 -sigma6 = sigma ** 6 - -psim = pairs.simulation("lj", debug=True) -psim.add_real_property('mass', 1.0) -psim.add_position('position') -psim.add_vector_property('velocity') -psim.add_vector_property('force', vol=True) -psim.from_file("data/minimd_setup_32x32x32.input", ['mass', 'position', 'velocity']) -psim.build_neighbor_lists(cutoff_radius + skin) -psim.vtk_output(f"output/test_{target}") -psim.compute(lj, cutoff_radius, {'sigma6': sigma6, 'epsilon': epsilon}) -psim.compute(euler, symbols={'dt': dt}) - -if target == 'gpu': - psim.target(pairs.target_gpu()) -else: - psim.target(pairs.target_cpu()) - -psim.generate() diff --git a/examples/modular/force_reduction.cpp b/examples/modular/force_reduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e884b3d0acab91189ffd4465364d9e24319e8992 --- /dev/null +++ b/examples/modular/force_reduction.cpp @@ -0,0 +1,125 @@ +#include <iostream> +//--- +#include "force_reduction.hpp" + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + + // Set domain + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 0.1, 0.1, 0.1); + + // Create bodies + pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); + + // setup_sim after creating all bodies + pairs_sim->setup_sim(); + pairs_sim->update_mass_and_inertia(); + + // Track particle + //------------------------------------------------------------------------------------------- + if (pUid != ac->getInvalidUid()){ + std::cout<< "Particle " << pUid << " is created in rank " << pairs_sim->rank() << std::endl; + } + + MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + + if (pUid != ac->getInvalidUid()){ + std::cout<< "Particle " << pUid << " will be tracked by rank " << pairs_sim->rank() << std::endl; + } + + // Communicate particles (exchange/ghost) + //------------------------------------------------------------------------------------------- + pairs_sim->communicate(0); + ac->update(); + + // Helper lambdas for demo + //------------------------------------------------------------------------------------------- + auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; + auto pIsGhostInMyRank = [&](pairs::id_t uid){return ac->uidToIdxGhost(uid) != ac->getInvalidIdx();}; + + // Check which rank owns the particle, and which ranks have it as a ghost + //------------------------------------------------------------------------------------------- + ac->syncUid(PairsAccessor::Host); + if (pIsLocalInMyRank(pUid)){ + std::cout<< "Particle " << pUid << " is local in rank " << pairs_sim->rank() << std::endl; + } + if (pIsGhostInMyRank(pUid)){ + std::cout<< "Particle " << pUid << " is ghost in rank " << pairs_sim->rank() << std::endl; + } + + // Start timestep loop + //------------------------------------------------------------------------------------------- + int num_timesteps = 1; + for (int t=0; t<num_timesteps; ++t){ + ac->syncUid(PairsAccessor::Host); + + // Add local contribution + //------------------------------------------------------------------------------------------- + if (pIsLocalInMyRank(pUid)){ + int idx = ac->uidToIdxLocal(pUid); + pairs::Vector3<double> local_force(0.1, 0.1, 0.1); + pairs::Vector3<double> local_torque(0.2, 0.2, 0.2); + + std::cout << "Force on particle " << pUid << " from local rank [" << pairs_sim->rank() << "] : (" + << local_force[0] << ", " << local_force[1] << ", " << local_force[2] << ")" << std::endl; + + ac->setHydrodynamicForce(idx, local_force); + ac->setHydrodynamicTorque(idx, local_torque); + ac->syncHydrodynamicForce(PairsAccessor::Host, true); + ac->syncHydrodynamicTorque(PairsAccessor::Host, true); + } + + // Add neighbor contributions + //------------------------------------------------------------------------------------------- + if (pIsGhostInMyRank(pUid)){ + int idx = ac->uidToIdxGhost(pUid); + pairs::Vector3<double> ghost_force(pairs_sim->rank()*10, 1, 1); + pairs::Vector3<double> ghost_torque(pairs_sim->rank()*20, 2, 2); + + std::cout << "Force on particle " << pUid << " from neighbor rank [" << pairs_sim->rank() << "] : (" + << ghost_force[0] << ", " << ghost_force[1] << ", " << ghost_force[2] << ")" << std::endl; + + ac->setHydrodynamicForce(idx, ghost_force); + ac->setHydrodynamicTorque(idx, ghost_torque); + ac->syncHydrodynamicForce(PairsAccessor::Host, true); + ac->syncHydrodynamicTorque(PairsAccessor::Host, true); + } + + // Do computations + //------------------------------------------------------------------------------------------- + pairs_sim->update_cells(t); + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + pairs_sim->euler(5e-5); + //------------------------------------------------------------------------------------------- + + std::cout << "---- reverse_comm and reduce ----" << std::endl; + // reverse_comm() communicates data from ghost particles back to their owner ranks using + // information from the previous time that communicate() was called + pairs_sim->reverse_comm(); + + // Get the reduced force on the owner rank + //------------------------------------------------------------------------------------------- + if (pIsLocalInMyRank(pUid)){ + int idx = ac->uidToIdxLocal(pUid); + ac->syncHydrodynamicForce(PairsAccessor::Host); + ac->syncHydrodynamicTorque(PairsAccessor::Host); + auto force_sum = ac->getHydrodynamicForce(idx); + auto torque_sum = ac->getHydrodynamicTorque(idx); + + std::cout << "Reduced force on particle " << pUid << " in local rank [" << pairs_sim->rank() << "] : (" + << force_sum[0] << ", " << force_sum[1] << ", " << force_sum[2] << ")" << std::endl; + } + + // Usual communication + //------------------------------------------------------------------------------------------- + pairs_sim->communicate(t); + ac->update(); + } + + pairs_sim->end(); +} diff --git a/examples/modular/force_reduction.py b/examples/modular/force_reduction.py new file mode 100644 index 0000000000000000000000000000000000000000..af9cea7058190b7fda485cc1ec3a6fd6cde8b4f1 --- /dev/null +++ b/examples/modular/force_reduction.py @@ -0,0 +1,113 @@ +import math +import pairs +import sys +import os + +def update_mass_and_inertia(i): + rotation_matrix[i] = diagonal_matrix(1.0) + rotation[i] = default_quaternion() + + if is_sphere(i): + inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i])) + + else: + mass[i] = infinity + inv_inertia[i] = 0.0 + +def spring_dashpot(i, j): + delta_ij = -penetration_depth(i, j) + skip_when(delta_ij < 0.0) + + velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i]) + velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j]) + + rel_vel = -(velocity_wf_i - velocity_wf_j) + rel_vel_n = dot(rel_vel, contact_normal(i, j)) + rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j) + + fNabs = stiffness[i,j] * delta_ij + damping_norm[i,j] * rel_vel_n + fN = fNabs * contact_normal(i, j) + + fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs) + fT = fTabs * normalized(rel_vel_t) + + partial_force = fN + fT + apply(force, partial_force) + apply(torque, cross(contact_point(i, j) - position, partial_force)) + +def euler(i): + inv_mass = 1.0 / mass[i] + position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt + linear_velocity[i] += inv_mass * force[i] * dt + wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i]) + phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt + rotation[i] = quaternion(phi, length(phi)) * rotation[i] + rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) + angular_velocity[i] += wdot * dt + +def gravity(i): + force[i][2] -= force[i][2] - mass[i] * gravity_SI + + +file_name = os.path.basename(__file__) +file_name_without_extension = os.path.splitext(file_name)[0] + +psim = pairs.simulation( + file_name_without_extension, + [pairs.sphere(), pairs.halfspace()], + double_prec=True, + particle_capacity=1000000, + neighbor_capacity=20, + debug=True, + generate_whole_program=False) + + +target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" + +if target == 'gpu': + psim.target(pairs.target_gpu()) +elif target == 'cpu': + psim.target(pairs.target_cpu()) +else: + print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>") + +gravity_SI = 9.81 +diameter = 100 # required for linkedCellWidth. TODO: set linkedCellWidth at runtime +linkedCellWidth = 1.01 * diameter +ntypes = 2 + +psim.add_position('position') +psim.add_property('mass', pairs.real()) +psim.add_property('linear_velocity', pairs.vector()) +psim.add_property('angular_velocity', pairs.vector()) +psim.add_property('force', pairs.vector(), volatile=True) +psim.add_property('torque', pairs.vector(), volatile=True) +psim.add_property('radius', pairs.real()) +psim.add_property('normal', pairs.vector()) +psim.add_property('inv_inertia', pairs.matrix()) +psim.add_property('rotation_matrix', pairs.matrix()) +psim.add_property('rotation', pairs.quaternion()) + +# Properties that get reduced during reverse communication +psim.add_property('hydrodynamic_force', pairs.vector(), reduce=True) +psim.add_property('hydrodynamic_torque', pairs.vector(), reduce=True) + +psim.add_feature('type', ntypes) +psim.add_feature_property('type', 'stiffness', pairs.real(), [3000 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_norm', pairs.real(), [10.0 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_tan', pairs.real()) +psim.add_feature_property('type', 'friction', pairs.real()) + +psim.set_domain_partitioner(pairs.block_forest()) +psim.pbc([False, False, False]) +psim.build_cell_lists(linkedCellWidth) + +# The order of user-defined functions is not important here since +# they are not used by other subroutines and are only callable individually +psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf }) +psim.compute(spring_dashpot, linkedCellWidth) +psim.compute(gravity, symbols={'gravity_SI': gravity_SI }) +psim.compute(euler, parameters={'dt': pairs.real()}) + +psim.generate() + diff --git a/examples/modular/sd_1.cpp b/examples/modular/sd_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e56cfc5943c7626d97d6b872524655b9dd0dca0b --- /dev/null +++ b/examples/modular/sd_1.cpp @@ -0,0 +1,47 @@ +#include <iostream> +#include <memory> + +#include "spring_dashpot.hpp" + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); + + pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->update_mass_and_inertia(); + + int num_timesteps = 2000; + int vtk_freq = 20; + double dt = 1e-3; + + for (int t=0; t<num_timesteps; ++t){ + if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; + + pairs_sim->communicate(t); + + pairs_sim->update_cells(t); + + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + pairs_sim->euler(dt); + + pairs::vtk_write_data(pairs_runtime, "output/sd_1_local", 0, pairs_sim->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_1_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); + } + + pairs_sim->end(); +} diff --git a/examples/modular/sd_2.cpp b/examples/modular/sd_2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4bdb4c85a37efef0391ccecc9d2a1e1df6e3313d --- /dev/null +++ b/examples/modular/sd_2.cpp @@ -0,0 +1,68 @@ +#include <iostream> +#include <memory> + +#include <blockforest/BlockForest.h> +#include <blockforest/Initialization.h> + +#include "spring_dashpot.hpp" + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + // Create forest + // ------------------------------------------------------------------------------- + walberla::math::AABB domain(0, 0, 0, 1, 1, 1); + std::shared_ptr<walberla::mpi::MPIManager> mpiManager = walberla::mpi::MPIManager::instance(); + mpiManager->initializeMPI(&argc, &argv); + mpiManager->useWorldComm(); + auto procs = mpiManager->numProcesses(); + + walberla::Vector3<int> block_config; + if (procs==1) block_config = walberla::Vector3<int>(1, 1, 1); + else if (procs==4) block_config = walberla::Vector3<int>(2, 2, 1); + else { std::cout << "Error: Check block_config" << std::endl; exit(-1);} + + auto ref_level = 0; + std::shared_ptr<walberla::BlockForest> forest = walberla::blockforest::createBlockForest( + domain, block_config, walberla::Vector3<bool>(false, false, false), procs, ref_level); + + // Pass forest to P4IRS + // ------------------------------------------------------------------------------- + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->useDomain(forest); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); + + pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->update_mass_and_inertia(); + + int num_timesteps = 2000; + int vtk_freq = 20; + double dt = 1e-3; + + for (int t=0; t<num_timesteps; ++t){ + if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; + + pairs_sim->communicate(t); + + pairs_sim->update_cells(t); + + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + pairs_sim->euler(dt); + + pairs::vtk_write_data(pairs_runtime, "output/sd_2_local", 0, pairs_sim->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_2_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); + } + + pairs_sim->end(); +} diff --git a/examples/modular/sd_3_CPU.cpp b/examples/modular/sd_3_CPU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a8e4a93bc5e816eb8eee900b7ef1d6769e7ec206 --- /dev/null +++ b/examples/modular/sd_3_CPU.cpp @@ -0,0 +1,95 @@ +#include <iostream> +#include <memory> + +#include "spring_dashpot.hpp" + +void change_gravitational_force(std::shared_ptr<PairsAccessor> &ac, int idx){ + pairs::Vector3<double> upward_gravity(0.0, 0.0, 2 * ac->getMass(idx) * 9.81); + ac->setForce(idx, ac->getForce(idx) + upward_gravity); +} + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + + pairs::id_t pUid = pairs::create_sphere(pairs_runtime ,0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); + + MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + + auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; + + pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->update_mass_and_inertia(); + + pairs_sim->communicate(0); + + int num_timesteps = 2000; + int vtk_freq = 20; + double dt = 1e-3; + + for (int t=0; t<num_timesteps; ++t){ + + // Print position of particle pUid + //------------------------------------------------------------------------------------------- + if(pIsLocalInMyRank(pUid)){ + std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; + int idx = ac->uidToIdxLocal(pUid); + std::cout << "Position = (" + << ac->getPosition(idx)[0] << ", " + << ac->getPosition(idx)[1] << ", " + << ac->getPosition(idx)[2] << ")" << std::endl; + + } + + // Calculate forces + //------------------------------------------------------------------------------------------- + pairs_sim->update_cells(t); + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + + // Change gravitational force on particle pUid + //------------------------------------------------------------------------------------------- + if(pIsLocalInMyRank(pUid)){ + int idx = ac->uidToIdxLocal(pUid); + + std::cout << "Force before changing = (" + << ac->getForce(idx)[0] << ", " + << ac->getForce(idx)[1] << ", " + << ac->getForce(idx)[2] << ")" << std::endl; + + change_gravitational_force(ac, idx); + + std::cout << "Force after changing = (" + << ac->getForce(idx)[0] << ", " + << ac->getForce(idx)[1] << ", " + << ac->getForce(idx)[2] << ")" << std::endl; + } + + // Euler + //------------------------------------------------------------------------------------------- + pairs_sim->euler(dt); + + // Communicate + //------------------------------------------------------------------------------------------- + pairs_sim->communicate(t); + + pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_local", 0, ac->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_ghost", ac->nlocal(), ac->size(), t, vtk_freq); + } + + pairs_sim->end(); +} \ No newline at end of file diff --git a/examples/modular/sd_3_GPU.cu b/examples/modular/sd_3_GPU.cu new file mode 100644 index 0000000000000000000000000000000000000000..b44af846643ed9cf0a730b07c5f60543560e29b8 --- /dev/null +++ b/examples/modular/sd_3_GPU.cu @@ -0,0 +1,152 @@ +#include <iostream> +#include <memory> +#include <cuda_runtime.h> + +#include "spring_dashpot.hpp" + +void checkCudaError(cudaError_t err, const char* func) { + if (err != cudaSuccess) { + fprintf(stderr, "CUDA error in %s: %s\n", func, cudaGetErrorString(err)); + exit(err); + } +} + +__global__ void print_position(PairsAccessor ac, int idx){ + printf("Position [from device] = (%f, %f, %f) \n", ac.getPosition(idx)[0], ac.getPosition(idx)[1], ac.getPosition(idx)[2]); +} + +__global__ void change_gravitational_force(PairsAccessor ac, int idx){ + printf("Force [from device] before setting = (%f, %f, %f) \n", ac.getForce(idx)[0], ac.getForce(idx)[1], ac.getForce(idx)[2]); + + pairs::Vector3<double> upward_gravity(0.0, 0.0, 2 * ac.getMass(idx) * 9.81); + ac.setForce(idx, ac.getForce(idx) + upward_gravity); + + printf("Force [from device] after setting = (%f, %f, %f) \n", ac.getForce(idx)[0], ac.getForce(idx)[1], ac.getForce(idx)[2]); +} + +void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){ + ac->setTypeStiffness(0,0, 0); + ac->setTypeStiffness(0,1, 1000); + ac->setTypeStiffness(1,0, 1000); + ac->setTypeStiffness(1,1, 3000); + ac->syncTypeStiffness(); + + ac->setTypeDampingNorm(0,0, 0); + ac->setTypeDampingNorm(0,1, 20); + ac->setTypeDampingNorm(1,0, 20); + ac->setTypeDampingNorm(1,1, 10); + ac->syncTypeDampingNorm(); +} + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + // Create PairsAccessor after PairsSimulation is initialized + auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + + pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0); + + set_feature_properties(ac); + + MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); + + auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; + + pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->update_mass_and_inertia(); + + pairs_sim->communicate(0); + // PairsAccessor requires an update when particles are communicated + ac->update(); + + int num_timesteps = 2000; + int vtk_freq = 20; + double dt = 1e-3; + + for (int t=0; t<num_timesteps; ++t){ + // Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host + ac->syncUid(PairsAccessor::Host); + + // Print position of particle pUid + //------------------------------------------------------------------------------------------- + if(pIsLocalInMyRank(pUid)){ + std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; + int idx = ac->uidToIdxLocal(pUid); + + // Up-to-date position might be on host or device. + // Sync position on Host before reading it from host: + ac->syncPosition(PairsAccessor::Host); + std::cout << "Position [from host] = (" + << ac->getPosition(idx)[0] << ", " + << ac->getPosition(idx)[1] << ", " + << ac->getPosition(idx)[2] << ")" << std::endl; + + // Sync position on Device before reading it from device: + ac->syncPosition(PairsAccessor::Device); + print_position<<<1,1>>>(*ac, idx); + checkCudaError(cudaDeviceSynchronize(), "print_position"); + + // There's no need to sync position here to continue the simulation, since position wasn't modified. + } + + // Calculate forces + //------------------------------------------------------------------------------------------- + pairs_sim->update_cells(t); + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + + // Change gravitational force on particle pUid + //------------------------------------------------------------------------------------------- + ac->syncUid(PairsAccessor::Host); + + if(pIsLocalInMyRank(pUid)){ + std::cout << "Force Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; + int idx = ac->uidToIdxLocal(pUid); + + // Up-to-date force and mass might be on host or device. + // So sync them in Device before accessing them on device. (No data will be transfered if they are already on device) + ac->syncForce(PairsAccessor::Device); + ac->syncMass(PairsAccessor::Device); + + // Modify force from device: + change_gravitational_force<<<1,1>>>(*ac, idx); + checkCudaError(cudaDeviceSynchronize(), "change_gravitational_force"); + + // Force on device was modified. + // So sync force before continuing the simulation. + ac->syncForce(PairsAccessor::Host); + std::cout << "Force [from host] after changing = (" + << ac->getForce(idx)[0] << ", " + << ac->getForce(idx)[1] << ", " + << ac->getForce(idx)[2] << ")" << std::endl; + } + + // Euler + //------------------------------------------------------------------------------------------- + pairs_sim->euler(dt); + + // Communicate + //------------------------------------------------------------------------------------------- + pairs_sim->communicate(t); + // PairsAccessor requires an update when particles are communicated + ac->update(); + + pairs::vtk_write_data(pairs_runtime, "output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); + } + + pairs_sim->end(); +} \ No newline at end of file diff --git a/examples/modular/sd_4.cpp b/examples/modular/sd_4.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80ec40313e13692d1b41fcee01f6b5ce7d4ef91b --- /dev/null +++ b/examples/modular/sd_4.cpp @@ -0,0 +1,99 @@ +#include <iostream> +#include <memory> +#include <iomanip> + +#include "spring_dashpot.hpp" + +void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){ + ac->setTypeStiffness(0,0, 100000); + ac->setTypeStiffness(0,1, 100000); + ac->setTypeStiffness(1,0, 100000); + ac->setTypeStiffness(1,1, 100000); + ac->syncTypeStiffness(); + + ac->setTypeDampingNorm(0,0, 300); + ac->setTypeDampingNorm(0,1, 300); + ac->setTypeDampingNorm(1,0, 300); + ac->setTypeDampingNorm(1,1, 300); + ac->syncTypeDampingNorm(); + + ac->setTypeFriction(0,0, 0.5); + ac->setTypeFriction(0,1, 0.5); + ac->setTypeFriction(1,0, 0.5); + ac->setTypeFriction(1,1, 0.5); + ac->syncTypeFriction(); + + ac->setTypeDampingTan(0,0, 20); + ac->setTypeDampingTan(0,1, 20); + ac->setTypeDampingTan(1,0, 20); + ac->setTypeDampingTan(1,1, 20); + ac->syncTypeDampingTan(); +} + +int main(int argc, char **argv) { + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + set_feature_properties(ac); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + + pairs_runtime->initDomain(&argc, &argv, + 0, 0, 0, 20, 20, 20, // Domain bounds + false, false, false, // PBCs --------------> TODO: runtime pbc + true // Enable dynamic load balancing (does initial refinement on a <1,1,1> blockforest) + ); + + pairs_runtime->getDomainPartitioner()->initWorkloadBalancer(pairs::Hilbert, 100, 1000); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 20,20,20, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 20,20,20, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 20,20,20, 0, 0, -1, 0, 13); + + double diameter_min = 0.3; + double diameter_max = 0.3; + double sphere_spacing = 0.4; + pairs::dem_sc_grid(pairs_runtime, 10, 10, 15, sphere_spacing, diameter_min, diameter_min, diameter_max, 2, 100, 2); + + double lcw = diameter_max * 1.01; // Linked-cell width + double interaction_radius = diameter_max; + pairs_sim->setup_sim(lcw, lcw, lcw, interaction_radius); + + pairs_sim->update_mass_and_inertia(); + + int num_timesteps = 4000; + int vtk_freq = 20; + int rebalance_freq = 200; + double dt = 1e-3; + + pairs::vtk_write_subdom(pairs_runtime, "output/subdom_init", 0); + + + for (int t=0; t<num_timesteps; ++t){ + if ((t % vtk_freq==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; + + if (t % rebalance_freq == 0){ + pairs_sim->update_domain(); + } + + pairs_sim->update_cells(t); + + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + pairs_sim->euler(dt); + + pairs_sim->communicate(t); + + if (t % vtk_freq==0){ + pairs::vtk_write_subdom(pairs_runtime, "output/subdom", t); + pairs::vtk_write_data(pairs_runtime, "output/sd_4_local", 0, pairs_sim->nlocal(), t); + pairs::vtk_write_data(pairs_runtime, "output/sd_4_ghost", pairs_sim->nlocal(), pairs_sim->size(), t); + } + } + + pairs_sim->end(); +} \ No newline at end of file diff --git a/examples/modular/spring_dashpot.py b/examples/modular/spring_dashpot.py new file mode 100644 index 0000000000000000000000000000000000000000..191c000ca61962af8ebae77ab4ec1b97b433d399 --- /dev/null +++ b/examples/modular/spring_dashpot.py @@ -0,0 +1,107 @@ +import math +import pairs +import sys +import os + +def update_mass_and_inertia(i): + rotation_matrix[i] = diagonal_matrix(1.0) + rotation[i] = default_quaternion() + + if is_sphere(i): + inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i])) + + else: + mass[i] = infinity + inv_inertia[i] = 0.0 + +def spring_dashpot(i, j): + delta_ij = -penetration_depth(i, j) + skip_when(delta_ij < 0.0) + + velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i]) + velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j]) + + rel_vel = -(velocity_wf_i - velocity_wf_j) + rel_vel_n = dot(rel_vel, contact_normal(i, j)) + rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j) + + fNabs = stiffness[i,j] * delta_ij + damping_norm[i,j] * rel_vel_n + fN = fNabs * contact_normal(i, j) + + fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs) + fT = fTabs * normalized(rel_vel_t) + + partial_force = fN + fT + apply(force, partial_force) + apply(torque, cross(contact_point(i, j) - position, partial_force)) + +def euler(i): + inv_mass = 1.0 / mass[i] + position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt + linear_velocity[i] += inv_mass * force[i] * dt + wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i]) + phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt + rotation[i] = quaternion(phi, length(phi)) * rotation[i] + rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) + angular_velocity[i] += wdot * dt + +def gravity(i): + force[i][2] -= mass[i] * gravity_SI + + +file_name = os.path.basename(__file__) +file_name_without_extension = os.path.splitext(file_name)[0] + +psim = pairs.simulation( + file_name_without_extension, + [pairs.sphere(), pairs.halfspace()], + double_prec=True, + particle_capacity=1000000, + neighbor_capacity=20, + debug=True, + generate_whole_program=False) + + +target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" + +if target == 'gpu': + psim.target(pairs.target_gpu()) +elif target == 'cpu': + psim.target(pairs.target_cpu()) +else: + print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>") + +psim.add_position('position') +psim.add_property('mass', pairs.real()) +psim.add_property('linear_velocity', pairs.vector()) +psim.add_property('angular_velocity', pairs.vector()) +psim.add_property('force', pairs.vector(), volatile=True) +psim.add_property('torque', pairs.vector(), volatile=True) +psim.add_property('radius', pairs.real()) +psim.add_property('normal', pairs.vector()) +psim.add_property('inv_inertia', pairs.matrix()) +psim.add_property('rotation_matrix', pairs.matrix()) +psim.add_property('rotation', pairs.quaternion()) + +ntypes = 2 +psim.add_feature('type', ntypes) +psim.add_feature_property('type', 'stiffness', pairs.real(), [3000 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_norm', pairs.real(), [10.0 for i in range(ntypes * ntypes)]) +psim.add_feature_property('type', 'damping_tan', pairs.real()) +psim.add_feature_property('type', 'friction', pairs.real()) + +psim.set_domain_partitioner(pairs.block_forest()) +psim.pbc([False, False, False]) +psim.build_cell_lists() + +# The order of user-defined functions is not important here since +# they are not used by other subroutines and are only callable individually +psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf }) +psim.compute(spring_dashpot) +psim.compute(euler, parameters={'dt': pairs.real()}) + +gravity_SI = 9.81 +psim.compute(gravity, symbols={'gravity_SI': gravity_SI }) + +psim.generate() + diff --git a/examples/dem.py b/examples/whole-program-generation/linear_spring_dashpot.py similarity index 91% rename from examples/dem.py rename to examples/whole-program-generation/linear_spring_dashpot.py index 9cecf55eb8a8cef70cce3ff5927ffb1cfdbc8ebd..90348fd7ecb0acfe1a7fd6d84bf4b7bc5c4dbcf1 100644 --- a/examples/dem.py +++ b/examples/whole-program-generation/linear_spring_dashpot.py @@ -97,9 +97,6 @@ if target != 'cpu' and target != 'gpu': # Config file parameters domainSize_SI = [0.8, 0.015, 0.2] -#domainSize_SI = [0.4, 0.4, 0.2] # node base -#domainSize_SI = [0.6, 0.6, 0.2] # node base -#domainSize_SI = [0.8, 0.8, 0.2] # node base diameter_SI = 0.0029 gravity_SI = 9.81 densityFluid_SI = 1000 @@ -112,7 +109,6 @@ restitutionCoefficient = 0.1 collisionTime_SI = 5e-4 poissonsRatio = 0.22 timeSteps = 10000 -#timeSteps = 1000 visSpacing = 100 denseBottomLayer = False bottomLayerOffsetFactor = 1.0 @@ -128,13 +124,14 @@ frictionStatic = 0.0 frictionDynamic = frictionCoefficient psim = pairs.simulation( - "dem", + "linear_spring_dashpot", [pairs.sphere(), pairs.halfspace()], timesteps=timeSteps, double_prec=True, use_contact_history=True, particle_capacity=1000000, - neighbor_capacity=20) + neighbor_capacity=20, + generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -167,29 +164,16 @@ psim.dem_sc_grid( domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI, diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes) -#psim.read_particle_data( -# "data/spheres.input", -# "data/spheres_4x4x2.input", -# "data/spheres_6x6x2.input", -# "data/spheres_8x8x2.input", -# ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) - -#psim.read_particle_data( -# "data/spheres_bottom.input", -# ['type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) psim.read_particle_data( "data/planes.input", - ['uid', 'type', 'mass', 'position', 'normal', 'flags'], + ['type', 'mass', 'position', 'normal', 'flags'], pairs.halfspace()) psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI, 'pi': math.pi, 'infinity': math.inf }) -#psim.compute_half() psim.build_cell_lists(linkedCellWidth) #psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) diff --git a/examples/md.py b/examples/whole-program-generation/md.py similarity index 85% rename from examples/md.py rename to examples/whole-program-generation/md.py index 08729e1e52994e47395cb13bfd4417ef0348b77f..22eb3c1f58d0fb7d8892232b04a9aa8b146cccf4 100644 --- a/examples/md.py +++ b/examples/whole-program-generation/md.py @@ -35,7 +35,12 @@ nz = 32 rho = 0.8442 temp = 1.44 -psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True) +psim = pairs.simulation("md", + [pairs.point_mass()], + timesteps=200, + double_prec=True, + debug=True, + generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -55,9 +60,8 @@ psim.set_domain_partitioner(pairs.regular_domain_partitioner()) psim.compute_thermo(100) psim.reneighbor_every(20) -#psim.compute_half() psim.build_neighbor_lists(cutoff_radius + skin) -#psim.vtk_output(f"output/md_{target}") +# psim.vtk_output(f"output/md_{target}") psim.compute(initial_integrate, symbols={'dt': dt}, pre_step=True, skip_first=True) psim.compute(lennard_jones, cutoff_radius) diff --git a/examples/whole-program-generation/spring_dashpot.py b/examples/whole-program-generation/spring_dashpot.py new file mode 100644 index 0000000000000000000000000000000000000000..6212b0aa7fd41d3d78ad4655f3d9de8f8afa1750 --- /dev/null +++ b/examples/whole-program-generation/spring_dashpot.py @@ -0,0 +1,162 @@ +import math +import pairs +import sys +import os + +def update_mass_and_inertia(i): + rotation_matrix[i] = diagonal_matrix(1.0) + rotation[i] = default_quaternion() + + if is_sphere(i): + inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i])) + + else: + mass[i] = infinity + inv_inertia[i] = 0.0 + +def spring_dashpot(i, j): + delta_ij = -penetration_depth(i, j) + skip_when(delta_ij < 0.0) + + velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i]) + velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j]) + + rel_vel = -(velocity_wf_i - velocity_wf_j) + rel_vel_n = dot(rel_vel, contact_normal(i, j)) + rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j) + + fNabs = stiffness[i,j] * delta_ij + damping_norm[i,j] * rel_vel_n + fN = fNabs * contact_normal(i, j) + + fTabs = min(damping_tan[i,j] * length(rel_vel_t), friction[i, j] * fNabs) + fT = fTabs * normalized(rel_vel_t) + + partial_force = fN + fT + apply(force, partial_force) + apply(torque, cross(contact_point(i, j) - position, partial_force)) + +def euler(i): + inv_mass = 1.0 / mass[i] + position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt + linear_velocity[i] += inv_mass * force[i] * dt + wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i]) + phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt + rotation[i] = quaternion(phi, length(phi)) * rotation[i] + rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) + angular_velocity[i] += wdot * dt + +def gravity(i): + force[i][2] -= mass[i] * gravity_SI + + +# Domain size +domainSize_SI=[10, 10, 10] + +# Parameters required for generating the initial grid of particles 'dem_sc_grid' +generationSpacing_SI = 0.4 +diameter_SI = 0.3 +minDiameter_SI = diameter_SI +maxDiameter_SI = diameter_SI +initialVelocity_SI = 2 +densityParticle_SI = 100 + +# Linked cell width +linkedCellWidth = 1.01 * maxDiameter_SI + +# Required symbol for the 'gravity' module +gravity_SI = 9.81 + +# Required symbol for the 'euler' module +dt_SI = 1e-3 + +# VTK frequency +visSpacing = 20 + +timeSteps = 2000 + +# file_name_without_extension is the simulation identifer (in this case "spring_dashpot") +# TODO: Integration with cmake +file_name = os.path.basename(__file__) +file_name_without_extension = os.path.splitext(file_name)[0] + +psim = pairs.simulation( + file_name_without_extension, + [pairs.sphere(), pairs.halfspace()], + timesteps=timeSteps, + double_prec=True, + particle_capacity=1000000, + neighbor_capacity=20, + debug=True, + generate_whole_program=True) + +target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" +if target == 'gpu': + psim.target(pairs.target_gpu()) +elif target == 'cpu': + psim.target(pairs.target_cpu()) +else: + print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>") + + +# Register properties +psim.add_position('position') +psim.add_property('mass', pairs.real()) +psim.add_property('linear_velocity', pairs.vector()) +psim.add_property('angular_velocity', pairs.vector()) +psim.add_property('force', pairs.vector(), volatile=True) +psim.add_property('torque', pairs.vector(), volatile=True) +psim.add_property('radius', pairs.real()) +psim.add_property('normal', pairs.vector()) +psim.add_property('inv_inertia', pairs.matrix()) +psim.add_property('rotation_matrix', pairs.matrix()) +psim.add_property('rotation', pairs.quaternion()) + +# Define the number of 'type' features and their pair-wise properties +ntypes = 2 +stiffness_SI = [100000 for i in range(ntypes * ntypes)] +dampingNorm_SI = [300 for i in range(ntypes * ntypes)] +dampingTan_SI = [0.5 for i in range(ntypes * ntypes)] +friction_SI = [20.0 for i in range(ntypes * ntypes)] + +# Register 'type' as a feature +psim.add_feature('type', ntypes) + +# Register properties for the 'type' feature +psim.add_feature_property('type', 'stiffness', pairs.real(), stiffness_SI) +psim.add_feature_property('type', 'damping_norm', pairs.real(), dampingNorm_SI) +psim.add_feature_property('type', 'damping_tan', pairs.real(), dampingTan_SI) +psim.add_feature_property('type', 'friction', pairs.real(), friction_SI) + +# Define the domain and optimization strategies +psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]]) +psim.pbc([False, False, False]) +psim.set_domain_partitioner(pairs.block_forest()) +psim.set_workload_balancer(pairs.morton(), regrid_min=100, regrid_max=1000, rebalance_frequency=200) +psim.build_cell_lists(linkedCellWidth) + +# Generate particles +psim.dem_sc_grid(domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], + generationSpacing_SI, + diameter_SI, + minDiameter_SI, + maxDiameter_SI, + initialVelocity_SI, + densityParticle_SI, + ntypes) + +# Read planes from file +psim.read_particle_data( "data/sd_planes.input", ['type', 'mass', 'position', 'normal', 'flags'], pairs.halfspace()) + +psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) + +# The user-defined 'setup' functions are executed only once before the timestep loop +psim.setup(update_mass_and_inertia, symbols={'infinity': math.inf }) + +# The user-defined 'compute' functions are added to the timestep loop in the order they are given to 'compute' +psim.compute(spring_dashpot, linkedCellWidth) +psim.compute(gravity, symbols={'gravity_SI': gravity_SI }) +psim.compute(euler, symbols={'dt': dt_SI}) + +# Triger code generation +psim.generate() + diff --git a/runtime/array.hpp b/runtime/array.hpp index 03a538543ee1f465104b101ea7bb353d527c4f78..d422842f96cbdb6343c11ca370d76a3f94be29c3 100644 --- a/runtime/array.hpp +++ b/runtime/array.hpp @@ -24,14 +24,14 @@ public: PAIRS_ASSERT(size_ > 0); } - array_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + array_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } - void setSize(size_t size_) { size = size_; } - size_t getSize() { return size; }; - bool isStatic() { return is_static; } + void setSize(size_t size_) { size = size_;} + size_t getSize() const { return size; } + bool isStatic() const { return is_static; } }; } diff --git a/runtime/boundary_weights.cpp b/runtime/boundary_weights.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a67d29386c5d1d11b057b3a6435a3c8d7496626 --- /dev/null +++ b/runtime/boundary_weights.cpp @@ -0,0 +1,46 @@ +#include "boundary_weights.hpp" + +// Always include last generated interfaces +#include "last_generated.hpp" +namespace pairs { + +void compute_boundary_weights( + PairsRuntime *ps, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + long unsigned int *comp_weight, long unsigned int *comm_weight) { + + const int particle_capacity = ps->getTrackedVariableAsInteger("particle_capacity"); + const int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + auto position_prop = ps->getPropertyByName("position"); + auto flags_prop = ps->getPropertyByName("flags"); + + real_t *position_ptr = static_cast<real_t *>(position_prop.getHostPointer()); + int *flags_ptr = static_cast<int *>(flags_prop.getHostPointer()); + + *comp_weight = 0; + + for(int i = 0; i < nlocal; i++) { + if (pairs_host_interface::get_flags(flags_ptr, i) & (pairs::flags::INFINITE | pairs::flags::GLOBAL)) { + continue; + } + + real_t pos_x = pairs_host_interface::get_position(position_ptr, i, 0, particle_capacity); + real_t pos_y = pairs_host_interface::get_position(position_ptr, i, 1, particle_capacity); + real_t pos_z = pairs_host_interface::get_position(position_ptr, i, 2, particle_capacity); + + if( pos_x >= xmin && pos_x < xmax && + pos_y >= ymin && pos_y < ymax && + pos_z >= zmin && pos_z < zmax) { + (*comp_weight)++; + } + } + + // TODO: Count the number of ghosts that must be communicated with this block. + // Note: The ghosts stored in this rank are NOT contained in the aabb of any of its blocks. + // And neighbor blocks are going to change after rebalancing. + // const int nghost = ps->getTrackedVariableAsInteger("nghost"); + *comm_weight = 0; + +} + +} diff --git a/runtime/boundary_weights.cu b/runtime/boundary_weights.cu new file mode 100644 index 0000000000000000000000000000000000000000..191139fa245dd5104afdb4793fbe0f27cdaa4441 --- /dev/null +++ b/runtime/boundary_weights.cu @@ -0,0 +1,108 @@ +#include "boundary_weights.hpp" +// #include "devices/device.hpp" + +// Always include last generated interfaces +#include "last_generated.hpp" +#define CUDA_ASSERT(a) { pairs::cuda_assert((a), __FILE__, __LINE__); } + +namespace pairs { + +#define REDUCE_BLOCK_SIZE 64 + +__global__ void reduceBoundaryWeights( real_t *position, int *flags, int start, int end, int particle_capacity, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, int *d_weights) { + + __shared__ int red_data[REDUCE_BLOCK_SIZE]; + int tid = threadIdx.x; + int i = blockIdx.x * blockDim.x + tid; + int particle_idx = start + i; + + red_data[tid] = 0; + + if(particle_idx < end) { + if (!(pairs_cuda_interface::get_flags(flags, i) & (pairs::flags::INFINITE | pairs::flags::GLOBAL))) { + + real_t pos_x = pairs_cuda_interface::get_position(position, particle_idx, 0, particle_capacity); + real_t pos_y = pairs_cuda_interface::get_position(position, particle_idx, 1, particle_capacity); + real_t pos_z = pairs_cuda_interface::get_position(position, particle_idx, 2, particle_capacity); + + if( pos_x >= xmin && pos_x < xmax && + pos_y >= ymin && pos_y < ymax && + pos_z >= zmin && pos_z < zmax) { + red_data[tid] = 1; + } + } + } + + __syncthreads(); + + int s = blockDim.x >> 1; + while(s > 0) { + if(tid < s) { + red_data[tid] += red_data[tid + s]; + } + + __syncthreads(); + s >>= 1; + } + + if(tid == 0) { + d_weights[blockIdx.x] = red_data[0]; + } +} + +int cuda_compute_boundary_weights( + real_t *position, int *flags, int start, int end, int particle_capacity, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) { + + if (start==end) return 0; + const int nblocks = (end - start + (REDUCE_BLOCK_SIZE - 1)) / REDUCE_BLOCK_SIZE; + + int *h_weights = (int *) malloc(nblocks * sizeof(int)); + int *d_weights = (int *) device_alloc(nblocks * sizeof(int)); + int red = 0; + + CUDA_ASSERT(cudaMemset(d_weights, 0, nblocks * sizeof(int))); + reduceBoundaryWeights<<<nblocks, REDUCE_BLOCK_SIZE>>>( + position, flags, start, end, particle_capacity, + xmin, xmax, ymin, ymax, zmin, zmax, d_weights); + + CUDA_ASSERT(cudaPeekAtLastError()); + CUDA_ASSERT(cudaDeviceSynchronize()); + CUDA_ASSERT(cudaMemcpy(h_weights, d_weights, nblocks * sizeof(int), cudaMemcpyDeviceToHost)); + + for(int i = 0; i < nblocks; i++) { + red += h_weights[i]; + } + + return red; +} + +void compute_boundary_weights( + PairsRuntime *ps, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + long unsigned int *comp_weight, long unsigned int *comm_weight) { + + const int particle_capacity = ps->getTrackedVariableAsInteger("particle_capacity"); + const int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + const int nghost = ps->getTrackedVariableAsInteger("nghost"); + auto position_prop = ps->getPropertyByName("position"); + auto flags_prop = ps->getPropertyByName("flags"); + + + real_t *position_ptr = static_cast<real_t *>(position_prop.getDevicePointer()); + int *flags_ptr = static_cast<int *>(flags_prop.getDevicePointer()); + + ps->copyPropertyToDevice(position_prop.getId(), ReadOnly); + ps->copyPropertyToDevice(flags_prop.getId(), ReadOnly); + + *comp_weight = cuda_compute_boundary_weights( + position_ptr, flags_ptr, 0, nlocal, particle_capacity, xmin, xmax, ymin, ymax, zmin, zmax); + + // TODO + // *comm_weight = cuda_compute_boundary_weights( + // position_ptr, nlocal, nlocal + nghost, particle_capacity, xmin, xmax, ymin, ymax, zmin, zmax); + *comm_weight = 0; +} + +} diff --git a/runtime/boundary_weights.hpp b/runtime/boundary_weights.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e84348a0c8438255ca0090b765b56fd95f7deb10 --- /dev/null +++ b/runtime/boundary_weights.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include "pairs.hpp" +#include <iostream> +#include <string.h> +#include <fstream> +#include <sstream> +//--- +#include "pairs.hpp" +#include "pairs_common.hpp" + + +namespace pairs { + +void compute_boundary_weights( + PairsRuntime *ps, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + long unsigned int *comp_weight, long unsigned int *comm_weight); + +} diff --git a/runtime/contact_property.hpp b/runtime/contact_property.hpp index 2d1e03d66362b73a365e5bebcae025680e190dfe..a64992f6835aa6eb2b7d1a6be512c960cd444562 100644 --- a/runtime/contact_property.hpp +++ b/runtime/contact_property.hpp @@ -26,17 +26,18 @@ public: PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : + (type == Prop_UInt64) ? sizeof(uint64_t) : (type == Prop_Real) ? sizeof(real_t) : (type == Prop_Vector) ? sizeof(real_t) : (type == Prop_Matrix) ? sizeof(real_t) : diff --git a/runtime/copper_fcc_lattice.cpp b/runtime/copper_fcc_lattice.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1fd364f50fc9da952d30fa41c4e7596aad5a7f71 --- /dev/null +++ b/runtime/copper_fcc_lattice.cpp @@ -0,0 +1,142 @@ +#include <iostream> +#include <math.h> +//--- +#include "copper_fcc_lattice.hpp" + +namespace pairs { + +double myrandom(int* seed) { + int k = (*seed) / IQ; + double ans; + + *seed = IA * (*seed - k * IQ) - IR * k; + if(*seed < 0) *seed += IM; + ans = AM * (*seed); + return ans; +} + +void random_reset(int *seed, int ibase, double *coord) { + int i; + char *str = (char *) &ibase; + int n = sizeof(int); + unsigned int hash = 0; + + for (i = 0; i < n; i++) { + hash += str[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + str = (char *) coord; + n = 3 * sizeof(double); + for (i = 0; i < n; i++) { + hash += str[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + // keep 31 bits of unsigned int as new seed + // do not allow seed = 0, since will cause hang in gaussian() + + *seed = hash & 0x7ffffff; + if (!(*seed)) *seed = 1; + + // warm up the RNG + + for (i = 0; i < 5; i++) myrandom(seed); + //save = 0; +} + +double copper_fcc_lattice( + PairsRuntime *ps, int nx, int ny, int nz, double xprd, double yprd, double zprd, + double rho, int ntypes) { + + auto uids = ps->getAsUInt64Property(ps->getPropertyByName("uid")); + auto shapes = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); + auto types = ps->getAsIntegerProperty(ps->getPropertyByName("type")); + auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); + auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); + auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); + double xlo = 0.0, xhi = xprd; + double ylo = 0.0, yhi = yprd; + double zlo = 0.0, zhi = zprd; + int natoms = ps->getTrackedVariableAsInteger("nlocal"); + //int natoms_expected = 4 * nx * ny * nz; + + double alat = pow((4.0 / rho), (1.0 / 3.0)); + int ilo = (int) (xlo / (0.5 * alat) - 1); + int ihi = (int) (xhi / (0.5 * alat) + 1); + int jlo = (int) (ylo / (0.5 * alat) - 1); + int jhi = (int) (yhi / (0.5 * alat) + 1); + int klo = (int) (zlo / (0.5 * alat) - 1); + int khi = (int) (zhi / (0.5 * alat) + 1); + + ilo = MAX(ilo, 0); + ihi = MIN(ihi, 2 * nx - 1); + jlo = MAX(jlo, 0); + jhi = MIN(jhi, 2 * ny - 1); + klo = MAX(klo, 0); + khi = MIN(khi, 2 * nz - 1); + + double xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp; + int i, j, k, m, n; + int sx = 0; int sy = 0; int sz = 0; + int ox = 0; int oy = 0; int oz = 0; + int subboxdim = 8; + + while(oz * subboxdim <= khi) { + k = oz * subboxdim + sz; + j = oy * subboxdim + sy; + i = ox * subboxdim + sx; + + if(((i + j + k) % 2 == 0) && + (i >= ilo) && (i <= ihi) && + (j >= jlo) && (j <= jhi) && + (k >= klo) && (k <= khi)) { + + xtmp = 0.5 * alat * i; + ytmp = 0.5 * alat * j; + ztmp = 0.5 * alat * k; + + if(ps->getDomainPartitioner()->isWithinSubdomain(xtmp, ytmp, ztmp)) { + n = k * (2 * ny) * (2 * nx) + j * (2 * nx) + i + 1; + for(m = 0; m < 5; m++) { myrandom(&n); } + vxtmp = myrandom(&n); + for(m = 0; m < 5; m++){ myrandom(&n); } + vytmp = myrandom(&n); + for(m = 0; m < 5; m++) { myrandom(&n); } + vztmp = myrandom(&n); + + uids(natoms) = UniqueID::create(ps); + masses(natoms) = 1.0; + positions(natoms, 0) = xtmp; + positions(natoms, 1) = ytmp; + positions(natoms, 2) = ztmp; + velocities(natoms, 0) = vxtmp; + velocities(natoms, 1) = vytmp; + velocities(natoms, 2) = vztmp; + types(natoms) = rand() % ntypes; + flags(natoms) = 0; + shapes(natoms) = 2; // point mass + natoms++; + } + } + + sx++; + + if(sx == subboxdim) { sx = 0; sy++; } + if(sy == subboxdim) { sy = 0; sz++; } + if(sz == subboxdim) { sz = 0; ox++; } + if(ox * subboxdim > ihi) { ox = 0; oy++; } + if(oy * subboxdim > jhi) { oy = 0; oz++; } + } + + return natoms; +} + +} diff --git a/runtime/copper_fcc_lattice.hpp b/runtime/copper_fcc_lattice.hpp index 48fec71f6cdc50a99dd09568d91a8390e38f551c..8c4a016acc5d891a33ab7e0f52db983a540d45dd 100644 --- a/runtime/copper_fcc_lattice.hpp +++ b/runtime/copper_fcc_lattice.hpp @@ -1,12 +1,8 @@ -#include <iostream> -#include <math.h> -//--- #include "pairs.hpp" +#include "unique_id.hpp" #pragma once -namespace pairs { - /* Park/Miller RNG w/out MASKING, so as to be like f90s version */ #define IA 16807 #define IM 2147483647 @@ -15,133 +11,12 @@ namespace pairs { #define IR 2836 #define MASK 123459876 -double myrandom(int* seed) { - int k= (*seed) / IQ; - double ans; - - *seed = IA * (*seed - k * IQ) - IR * k; - if(*seed < 0) *seed += IM; - ans = AM * (*seed); - return ans; -} - -void random_reset(int *seed, int ibase, double *coord) { - int i; - char *str = (char *) &ibase; - int n = sizeof(int); - unsigned int hash = 0; - - for (i = 0; i < n; i++) { - hash += str[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } - - str = (char *) coord; - n = 3 * sizeof(double); - for (i = 0; i < n; i++) { - hash += str[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - // keep 31 bits of unsigned int as new seed - // do not allow seed = 0, since will cause hang in gaussian() - - *seed = hash & 0x7ffffff; - if (!(*seed)) *seed = 1; - - // warm up the RNG - - for (i = 0; i < 5; i++) myrandom(seed); - //save = 0; -} - -double copper_fcc_lattice(PairsSimulation *ps, int nx, int ny, int nz, double xprd, double yprd, double zprd, double rho, int ntypes) { - auto shape = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); - auto types = ps->getAsIntegerProperty(ps->getPropertyByName("type")); - auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); - auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); - auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); - auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); - double xlo = 0.0, xhi = xprd; - double ylo = 0.0, yhi = yprd; - double zlo = 0.0, zhi = zprd; - int natoms = 0; - //int natoms_expected = 4 * nx * ny * nz; - - double alat = pow((4.0 / rho), (1.0 / 3.0)); - int ilo = (int) (xlo / (0.5 * alat) - 1); - int ihi = (int) (xhi / (0.5 * alat) + 1); - int jlo = (int) (ylo / (0.5 * alat) - 1); - int jhi = (int) (yhi / (0.5 * alat) + 1); - int klo = (int) (zlo / (0.5 * alat) - 1); - int khi = (int) (zhi / (0.5 * alat) + 1); - - ilo = MAX(ilo, 0); - ihi = MIN(ihi, 2 * nx - 1); - jlo = MAX(jlo, 0); - jhi = MIN(jhi, 2 * ny - 1); - klo = MAX(klo, 0); - khi = MIN(khi, 2 * nz - 1); - - double xtmp, ytmp, ztmp, vxtmp, vytmp, vztmp; - int i, j, k, m, n; - int sx = 0; int sy = 0; int sz = 0; - int ox = 0; int oy = 0; int oz = 0; - int subboxdim = 8; - - while(oz * subboxdim <= khi) { - k = oz * subboxdim + sz; - j = oy * subboxdim + sy; - i = ox * subboxdim + sx; - - if(((i + j + k) % 2 == 0) && - (i >= ilo) && (i <= ihi) && - (j >= jlo) && (j <= jhi) && - (k >= klo) && (k <= khi)) { - - xtmp = 0.5 * alat * i; - ytmp = 0.5 * alat * j; - ztmp = 0.5 * alat * k; - - if(ps->getDomainPartitioner()->isWithinSubdomain(xtmp, ytmp, ztmp)) { - n = k * (2 * ny) * (2 * nx) + j * (2 * nx) + i + 1; - for(m = 0; m < 5; m++) { myrandom(&n); } - vxtmp = myrandom(&n); - for(m = 0; m < 5; m++){ myrandom(&n); } - vytmp = myrandom(&n); - for(m = 0; m < 5; m++) { myrandom(&n); } - vztmp = myrandom(&n); - - masses(natoms) = 1.0; - positions(natoms, 0) = xtmp; - positions(natoms, 1) = ytmp; - positions(natoms, 2) = ztmp; - velocities(natoms, 0) = vxtmp; - velocities(natoms, 1) = vytmp; - velocities(natoms, 2) = vztmp; - types(natoms) = rand() % ntypes; - flags(natoms) = 0; - shape(natoms) = 2; // point mass - natoms++; - } - } - - sx++; - - if(sx == subboxdim) { sx = 0; sy++; } - if(sy == subboxdim) { sy = 0; sz++; } - if(sz == subboxdim) { sz = 0; ox++; } - if(ox * subboxdim > ihi) { ox = 0; oy++; } - if(oy * subboxdim > jhi) { oy = 0; oz++; } - } +namespace pairs { - return natoms; -} +double myrandom(int* seed); +void random_reset(int *seed, int ibase, double *coord); +double copper_fcc_lattice( + PairsRuntime *ps, int nx, int ny, int nz, double xprd, double yprd, double zprd, + double rho, int ntypes); } diff --git a/runtime/create_body.cpp b/runtime/create_body.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c431f646db037670fa29b0d4e73ab3e35a420ae --- /dev/null +++ b/runtime/create_body.cpp @@ -0,0 +1,75 @@ +#include "create_body.hpp" + +namespace pairs { + +// returns the uid of the body created, or 0 if the body is not created +id_t create_halfspace(PairsRuntime *pr, + double x, double y, double z, + double nx, double ny, double nz, + int type, int flag){ + // TODO: increase capacity if exceeded + id_t uid = 0; + auto uids = pr->getAsUInt64Property(pr->getPropertyByName("uid")); + auto shapes = pr->getAsIntegerProperty(pr->getPropertyByName("shape")); + auto types = pr->getAsIntegerProperty(pr->getPropertyByName("type")); + auto flags = pr->getAsIntegerProperty(pr->getPropertyByName("flags")); + auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position")); + auto normals = pr->getAsVectorProperty(pr->getPropertyByName("normal")); + + if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z) || flag & (flags::INFINITE | flags::GLOBAL) ){ + int n = pr->getTrackedVariableAsInteger("nlocal"); + uid = (flag & flags::GLOBAL) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); + uids(n) = uid; + positions(n, 0) = x; + positions(n, 1) = y; + positions(n, 2) = z; + normals(n, 0) = nx; + normals(n, 1) = ny; + normals(n, 2) = nz; + types(n) = type; + flags(n) = flag; + shapes(n) = 1; // halfspace + pr->setTrackedVariableAsInteger("nlocal", n + 1); + } + + return uid; +} + +// returns the uid of the body created, or 0 if the body is not created +id_t create_sphere(PairsRuntime *pr, + double x, double y, double z, + double vx, double vy, double vz, + double density, double radius, int type, int flag){ + // TODO: increase capacity if exceeded + id_t uid = 0; + auto uids = pr->getAsUInt64Property(pr->getPropertyByName("uid")); + auto shapes = pr->getAsIntegerProperty(pr->getPropertyByName("shape")); + auto types = pr->getAsIntegerProperty(pr->getPropertyByName("type")); + auto flags = pr->getAsIntegerProperty(pr->getPropertyByName("flags")); + auto masses = pr->getAsFloatProperty(pr->getPropertyByName("mass")); + auto radii = pr->getAsFloatProperty(pr->getPropertyByName("radius")); + auto positions = pr->getAsVectorProperty(pr->getPropertyByName("position")); + auto velocities = pr->getAsVectorProperty(pr->getPropertyByName("linear_velocity")); + + if(pr->getDomainPartitioner()->isWithinSubdomain(x, y, z)) { + int n = pr->getTrackedVariableAsInteger("nlocal"); + uid = (flag & flags::GLOBAL) ? UniqueID::createGlobal(pr) : UniqueID::create(pr); + uids(n) = uid; + radii(n) = radius; + masses(n) = ((4.0 / 3.0) * M_PI) * radius * radius * radius * density; + positions(n, 0) = x; + positions(n, 1) = y; + positions(n, 2) = z; + velocities(n, 0) = vx; + velocities(n, 1) = vy; + velocities(n, 2) = vz; + types(n) = type; + flags(n) = flag; + shapes(n) = 0; // sphere + pr->setTrackedVariableAsInteger("nlocal", n + 1); + } + + return uid; +} + +} \ No newline at end of file diff --git a/runtime/create_body.hpp b/runtime/create_body.hpp new file mode 100644 index 0000000000000000000000000000000000000000..995b1f6998940c09d484fad159ba0a382640a82b --- /dev/null +++ b/runtime/create_body.hpp @@ -0,0 +1,18 @@ +#include "pairs.hpp" +#include "unique_id.hpp" + +#pragma once + +namespace pairs { + +id_t create_halfspace(PairsRuntime *pr, + double x, double y, double z, + double nx, double ny, double nz, + int type, int flag); + +id_t create_sphere(PairsRuntime *pr, + double x, double y, double z, + double vx, double vy, double vz, + double density, double radius, int type, int flag); + +} \ No newline at end of file diff --git a/runtime/dem_sc_grid.cpp b/runtime/dem_sc_grid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..119ec78d75cf50dbbc73a9033750bb791353e6b5 --- /dev/null +++ b/runtime/dem_sc_grid.cpp @@ -0,0 +1,134 @@ +#include <iostream> +//--- +#include "dem_sc_grid.hpp" + +namespace pairs { + +namespace internal { + +static std::mt19937 generator; // static std::mt19937_64 generator; + +std::mt19937 & get_generator() { + // std::mt19937_64 + return generator; +} + +} + +bool point_within_aabb(double point[], double aabb[]) { + return point[0] >= aabb[0] && point[0] < aabb[3] && + point[1] >= aabb[1] && point[1] < aabb[4] && + point[2] >= aabb[2] && point[2] < aabb[5]; +} + +int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double spacing, double diameter, double min_diameter, double max_diameter, double initial_velocity, double particle_density, int ntypes) { + auto uids = ps->getAsUInt64Property(ps->getPropertyByName("uid")); + auto shapes = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); + auto types = ps->getAsIntegerProperty(ps->getPropertyByName("type")); + auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); + auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); + auto radius = ps->getAsFloatProperty(ps->getPropertyByName("radius")); + auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); + int nparticles = ps->getTrackedVariableAsInteger("nlocal"); + + const double xmin = 0.0; + const double ymin = 0.0; + const double zmin = 0.0; + + double gen_domain[] = {xmin, ymin, zmin, xmax, ymax, zmax}; + double ref_point[] = {spacing * 0.5, spacing * 0.5, spacing * 0.5}; + double sc_xmin = xmin - ref_point[0]; + double sc_ymin = ymin - ref_point[1]; + double sc_zmin = zmin - ref_point[2]; + + int iret = (int)(ceil(sc_xmin / spacing)); + int jret = (int)(ceil(sc_ymin / spacing)); + int kret = (int)(ceil(sc_zmin / spacing)); + + int i = iret; + int j = jret; + int k = kret; + + double point[3]; + point[0] = ref_point[0] + i * spacing; + point[1] = ref_point[1] + j * spacing; + point[2] = ref_point[2] + k * spacing; + + while(point_within_aabb(point, gen_domain)) { + auto pdiam = realRandom<real_t>(min_diameter, max_diameter); + + if(ps->getDomainPartitioner()->isWithinSubdomain(point[0], point[1], point[2])) { + real_t rad = pdiam * 0.5; + uids(nparticles) = UniqueID::create(ps); + radius(nparticles) = rad; + masses(nparticles) = ((4.0 / 3.0) * M_PI) * rad * rad * rad * particle_density; + positions(nparticles, 0) = point[0]; + positions(nparticles, 1) = point[1]; + positions(nparticles, 2) = point[2]; + velocities(nparticles, 0) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); + velocities(nparticles, 1) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); + velocities(nparticles, 2) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); + types(nparticles) = rand() % ntypes; + flags(nparticles) = 0; + shapes(nparticles) = shapes::Sphere; + + /* + std::cout << uid(nparticles) << "," << types(nparticles) << "," << masses(nparticles) << "," << radius(nparticles) << "," + << positions(nparticles, 0) << "," << positions(nparticles, 1) << "," << positions(nparticles, 2) << "," + << velocities(nparticles, 0) << "," << velocities(nparticles, 1) << "," << velocities(nparticles, 2) << "," + << flags(nparticles) << std::endl; + */ + + nparticles++; + } + + ++i; + point[0] = ref_point[0] + i * spacing; + point[1] = ref_point[1] + j * spacing; + point[2] = ref_point[2] + k * spacing; + + if(!point_within_aabb(point, gen_domain)) { + i = iret; + j++; + point[0] = ref_point[0] + i * spacing; + point[1] = ref_point[1] + j * spacing; + point[2] = ref_point[2] + k * spacing; + + if(!point_within_aabb(point, gen_domain)) { + j = jret; + k++; + point[0] = ref_point[0] + i * spacing; + point[1] = ref_point[1] + j * spacing; + point[2] = ref_point[2] + k * spacing; + + if(!point_within_aabb(point, gen_domain)) { + break; + } + } + } + } + + ps->setTrackedVariableAsInteger("nlocal", nparticles); + + int global_nparticles = nparticles; + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + MPI_Allreduce(&nparticles, &global_nparticles, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + } + + if(ps->getDomainPartitioner()->getRank() == 0) { + std::cout << "DEM Simple-Cubic Grid" << std::endl; + std::cout << "Domain size: <" << xmax << ", " << ymax << ", " << zmax << ">" << std::endl; + std::cout << "Spacing: " << spacing << std::endl; + std::cout << "Diameter: " << diameter + << " (min = " << min_diameter << ", max = " << max_diameter << ")" << std::endl; + std::cout << "Initial velocity: " << initial_velocity << std::endl; + std::cout << "Particle density: " << particle_density << std::endl; + std::cout << "Number of types: " << ntypes << std::endl; + std::cout << "Number of particles: " << global_nparticles << std::endl; + } + + return nparticles; +} + +} diff --git a/runtime/dem_sc_grid.hpp b/runtime/dem_sc_grid.hpp index 8c85ce6b77d84ddd9e8e52327e3323fefb29378a..9eb34620a56aea9703b676c941fada9b751f42e8 100644 --- a/runtime/dem_sc_grid.hpp +++ b/runtime/dem_sc_grid.hpp @@ -1,8 +1,9 @@ -#include <iostream> #include <math.h> #include <random> //--- #include "pairs.hpp" +#include "pairs_common.hpp" +#include "unique_id.hpp" #pragma once @@ -10,16 +11,11 @@ namespace pairs { namespace internal { -static std::mt19937 generator; // static std::mt19937_64 generator; +std::mt19937 & get_generator(); -std::mt19937 & get_generator() { - // std::mt19937_64 - return generator; } -} - -template< typename REAL_TYPE = real_t> +template<typename REAL_TYPE = real_t> REAL_TYPE realRandom( const REAL_TYPE min = REAL_TYPE(0), const REAL_TYPE max = REAL_TYPE(1), @@ -41,134 +37,23 @@ REAL_TYPE realRandom( return value; } +template<typename REAL_TYPE> +class RealRandom { +public: + RealRandom(const std::mt19937::result_type& seed = std::mt19937::result_type()) { + generator_.seed(seed); + } + REAL_TYPE operator()(const REAL_TYPE min = REAL_TYPE(0), const REAL_TYPE max = REAL_TYPE(1)) { + return realRandom(min, max, generator_); + } -template<typename REAL_TYPE> class RealRandom { -public: - RealRandom(const std::mt19937::result_type& seed = std::mt19937::result_type()) { generator_.seed(seed); } - REAL_TYPE operator()(const REAL_TYPE min = REAL_TYPE(0), const REAL_TYPE max = REAL_TYPE(1) ) { - return realRandom(min, max, generator_); - } private: std::mt19937 generator_; }; -bool point_within_aabb(double point[], double aabb[]) { - return point[0] >= aabb[0] && point[0] < aabb[3] && - point[1] >= aabb[1] && point[1] < aabb[4] && - point[2] >= aabb[2] && point[2] < aabb[5]; -} - -int dem_sc_grid(PairsSimulation *ps, double xmax, double ymax, double zmax, double spacing, double diameter, double min_diameter, double max_diameter, double initial_velocity, double particle_density, int ntypes) { - auto uid = ps->getAsIntegerProperty(ps->getPropertyByName("uid")); - auto shape = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); - auto types = ps->getAsIntegerProperty(ps->getPropertyByName("type")); - auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); - auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); - auto radius = ps->getAsFloatProperty(ps->getPropertyByName("radius")); - auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); - auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); - int last_uid = 1; - int nparticles = 0; - - const double xmin = 0.0; - const double ymin = 0.0; - const double zmin = diameter; - - double gen_domain[] = {xmin, ymin, zmin, xmax, ymax, zmax}; - double ref_point[] = {spacing * 0.5, spacing * 0.5, spacing * 0.5}; - double sc_xmin = xmin - ref_point[0]; - double sc_ymin = ymin - ref_point[1]; - double sc_zmin = zmin - ref_point[2]; - - int iret = (int)(ceil(sc_xmin / spacing)); - int jret = (int)(ceil(sc_ymin / spacing)); - int kret = (int)(ceil(sc_zmin / spacing)); - - int i = iret; - int j = jret; - int k = kret; - - double point[3]; - point[0] = ref_point[0] + i * spacing; - point[1] = ref_point[1] + j * spacing; - point[2] = ref_point[2] + k * spacing; - - while(point_within_aabb(point, gen_domain)) { - int particle_uid = last_uid; - auto diameter = realRandom<real_t>(min_diameter, max_diameter); - - if(ps->getDomainPartitioner()->isWithinSubdomain(point[0], point[1], point[2])) { - real_t rad = diameter * 0.5; - uid(nparticles) = particle_uid; - radius(nparticles) = rad; - masses(nparticles) = ((4.0 / 3.0) * M_PI) * rad * rad * rad * particle_density; - positions(nparticles, 0) = point[0]; - positions(nparticles, 1) = point[1]; - positions(nparticles, 2) = point[2]; - velocities(nparticles, 0) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); - velocities(nparticles, 1) = 0.1 * realRandom<real_t>(-initial_velocity, initial_velocity); - velocities(nparticles, 2) = -initial_velocity; - types(nparticles) = rand() % ntypes; - flags(nparticles) = 0; - shape(nparticles) = 0; // sphere - - /* - std::cout << uid(nparticles) << "," << types(nparticles) << "," << masses(nparticles) << "," << radius(nparticles) << "," - << positions(nparticles, 0) << "," << positions(nparticles, 1) << "," << positions(nparticles, 2) << "," - << velocities(nparticles, 0) << "," << velocities(nparticles, 1) << "," << velocities(nparticles, 2) << "," - << flags(nparticles) << std::endl; - */ - - nparticles++; - } - - ++i; - point[0] = ref_point[0] + i * spacing; - point[1] = ref_point[1] + j * spacing; - point[2] = ref_point[2] + k * spacing; - - if(!point_within_aabb(point, gen_domain)) { - i = iret; - j++; - point[0] = ref_point[0] + i * spacing; - point[1] = ref_point[1] + j * spacing; - point[2] = ref_point[2] + k * spacing; - - if(!point_within_aabb(point, gen_domain)) { - j = jret; - k++; - point[0] = ref_point[0] + i * spacing; - point[1] = ref_point[1] + j * spacing; - point[2] = ref_point[2] + k * spacing; - - if(!point_within_aabb(point, gen_domain)) { - break; - } - } - } - - last_uid++; - } - - int global_nparticles = nparticles; - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - MPI_Allreduce(&nparticles, &global_nparticles, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } +bool point_within_aabb(double point[], double aabb[]); - if(ps->getDomainPartitioner()->getRank() == 0) { - std::cout << "DEM Simple-Cubic Grid" << std::endl; - std::cout << "Domain size: <" << xmax << ", " << ymax << ", " << zmax << ">" << std::endl; - std::cout << "Spacing: " << spacing << std::endl; - std::cout << "Diameter: " << diameter - << " (min = " << min_diameter << ", max = " << max_diameter << ")" << std::endl; - std::cout << "Initial velocity: " << initial_velocity << std::endl; - std::cout << "Particle density: " << particle_density << std::endl; - std::cout << "Number of types: " << ntypes << std::endl; - std::cout << "Number of particles: " << global_nparticles << std::endl; - } - - return nparticles; -} +int dem_sc_grid(PairsRuntime *ps, double xmax, double ymax, double zmax, double spacing, double diameter, double min_diameter, double max_diameter, double initial_velocity, double particle_density, int ntypes); } diff --git a/runtime/device_flags.hpp b/runtime/device_flags.hpp index 4b5085fb6b61f8df1c3e1e129541266765745fdf..089e32f3122dff09e56e01120ca33816844f8030 100644 --- a/runtime/device_flags.hpp +++ b/runtime/device_flags.hpp @@ -14,7 +14,7 @@ private: static const int narrays_per_flag = 64; public: DeviceFlags(int narrays_) : narrays(narrays_) { - nflags = std::ceil((double) narrays_ / (double) narrays_per_flag); + nflags = static_cast<int>(std::ceil((double) narrays_ / (double) narrays_per_flag)); hflags = new unsigned long long int[nflags]; dflags = new unsigned long long int[nflags]; diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 8bb7c59ef3bbefff0556847669faeae57744372e..2cae5aa89accf7c720ea3f29d922184ab0264830 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -1,18 +1,13 @@ #include <cuda_runtime.h> #include <iostream> #include <cstring> +#include "../pairs_common.hpp" +#include "device.hpp" #define CUDA_ASSERT(a) { pairs::cuda_assert((a), __FILE__, __LINE__); } namespace pairs { -inline void cuda_assert(cudaError_t err, const char *file, int line) { - if(err != cudaSuccess) { - std::cerr << file << ":" << line << ": " << cudaGetErrorString(err) << std::endl; - exit(-1); - } -} - __host__ void *device_alloc(size_t size) { void *ptr; CUDA_ASSERT(cudaMalloc(&ptr, size)); @@ -71,4 +66,38 @@ __host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t //CUDA_ASSERT(cudaMemcpyFromSymbol(h_ptr, d_ptr, count)); } +#if __CUDA_ARCH__ < 600 +__device__ double atomicAdd_double(double* address, double val) { + unsigned long long int * ull_addr = (unsigned long long int*) address; + unsigned long long int old = *ull_addr, assumed; + + do { + assumed = old; + old = atomicCAS(ull_addr, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); +} +#else +__device__ double atomicAdd_double(double* address, double val) { + return atomicAdd(address, val); +} +#endif + +__device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); } +__device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double(addr, val); } +__device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { + const int add_res = *addr + val; + + // printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize); + + if(add_res >= capacity) { + *resize = add_res; + return *addr; + } + + return atomic_add(addr, val); +} + } diff --git a/runtime/devices/device.hpp b/runtime/devices/device.hpp index 107b70ee91512ed9ccd336be2168e0b75ed5eab8..c5c406ec7c5c02634e119d4db9b6bbde3d9c0aac 100644 --- a/runtime/devices/device.hpp +++ b/runtime/devices/device.hpp @@ -8,6 +8,8 @@ #ifndef PAIRS_TARGET_CUDA # define __host__ typedef int cudaError_t; +#else +#include <cuda_runtime.h> #endif namespace pairs { @@ -71,42 +73,19 @@ inline __host__ int host_atomic_add_resize_check(int *addr, int val, int *resize } #ifdef PAIRS_TARGET_CUDA -#if __CUDA_ARCH__ < 600 -__device__ double atomicAdd_double(double* address, double val) { - unsigned long long int * ull_addr = (unsigned long long int*) address; - unsigned long long int old = *ull_addr, assumed; - - do { - assumed = old; - old = atomicCAS(ull_addr, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - - return __longlong_as_double(old); -} -#else -__device__ double atomicAdd_double(double* address, double val) { - return atomicAdd(address, val); -} -#endif - -__device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); } -__device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double(addr, val); } -__device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { - const int add_res = *addr + val; - if(add_res >= capacity) { - *resize = add_res; - return *addr; +inline void cuda_assert(cudaError_t err, const char *file, int line) { + if(err != cudaSuccess) { + std::cerr << file << ":" << line << ": " << cudaGetErrorString(err) << std::endl; + exit(-1); } - - return atomic_add(addr, val); } +__device__ double atomicAdd_double(double* address, double val); +__device__ int atomic_add(int *addr, int val); +__device__ real_t atomic_add(real_t *addr, real_t val); +__device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity); #else -inline int atomic_add(int *addr, int val) { return host_atomic_add(addr, val); } -inline int atomic_add(real_t *addr, real_t val) { return host_atomic_add(addr, val); } -inline int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { - return host_atomic_add_resize_check(addr, val, resize, capacity); -} +int atomic_add(int *addr, int val); +real_t atomic_add(real_t *addr, real_t val); +int atomic_add_resize_check(int *addr, int val, int *resize, int capacity); #endif - } diff --git a/runtime/devices/dummy.cpp b/runtime/devices/dummy.cpp index a0151fc0aecd0322f2cd55feb9699ace713ae52e..9b06d0b267e45fa0ece7b492c27c86c47a1525b5 100644 --- a/runtime/devices/dummy.cpp +++ b/runtime/devices/dummy.cpp @@ -19,4 +19,16 @@ void copy_in_device(void *d_ptr1, const void *d_ptr2, size_t count) { std::memcpy(d_ptr1, d_ptr2, count); } +int atomic_add(int *addr, int val) { + return host_atomic_add(addr, val); +} + +real_t atomic_add(real_t *addr, real_t val) { + return host_atomic_add(addr, val); +} + +int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { + return host_atomic_add_resize_check(addr, val, resize, capacity); +} + } diff --git a/runtime/domain/ParticleDataHandling.hpp b/runtime/domain/ParticleDataHandling.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c54bae6404434af71f43c3d6a98d32d0e0537574 --- /dev/null +++ b/runtime/domain/ParticleDataHandling.hpp @@ -0,0 +1,349 @@ +#include <blockforest/BlockForest.h> +#include <blockforest/BlockDataHandling.h> + +#pragma once + +namespace pairs { + +class PairsRuntime; + +void relocate_particle(PairsRuntime *ps, int dst, int src){ + for(auto &prop: ps->getProperties()) { + if(!prop.isVolatile()) { + auto prop_type = prop.getType(); + + if(prop_type == pairs::Prop_Vector) { + auto vector_ptr = ps->getAsVectorProperty(prop); + constexpr int nelems = 3; + + for(int e = 0; e < nelems; e++) { + vector_ptr(dst, e) = vector_ptr(src, e); + } + } else if(prop_type == pairs::Prop_Matrix) { + auto matrix_ptr = ps->getAsMatrixProperty(prop); + constexpr int nelems = 9; + + for(int e = 0; e < nelems; e++) { + matrix_ptr(dst, e) = matrix_ptr(src, e); + } + } else if(prop_type == pairs::Prop_Quaternion) { + auto quat_ptr = ps->getAsQuaternionProperty(prop); + constexpr int nelems = 4; + + for(int e = 0; e < nelems; e++) { + quat_ptr(dst, e) = quat_ptr(src, e); + } + } else if(prop_type == pairs::Prop_Integer) { + auto int_ptr = ps->getAsIntegerProperty(prop); + int_ptr(dst) = int_ptr(src); + } else if(prop_type == pairs::Prop_UInt64) { + auto uint64_ptr = ps->getAsUInt64Property(prop); + uint64_ptr(dst) = uint64_ptr(src); + } else if(prop_type == pairs::Prop_Real) { + auto float_ptr = ps->getAsFloatProperty(prop); + float_ptr(dst) = float_ptr(src); + } else { + std::cerr << "relocate_particle(): Invalid property type!" << std::endl; + return; + } + } + } +} + +} + +namespace walberla { + +namespace internal { + +class ParticleDeleter { + friend bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs); + +public: + ParticleDeleter(pairs::PairsRuntime *ps_, const math::AABB& aabb_) : ps(ps_), aabb(aabb_) {} + + ~ParticleDeleter() { + int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + auto position = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); + + int ndeleted = 0; + int *goneIdx = new int[nlocal]; + + for (int i=0; i<nlocal; ++i) { + if (flags(i) & (pairs::flags::INFINITE | pairs::flags::GLOBAL)) continue; + + const real_t pos_x = position(i, 0); + const real_t pos_y = position(i, 1); + const real_t pos_z = position(i, 2); + + if( aabb.contains(pos_x, pos_y, pos_z)) { + goneIdx[ndeleted] = i; + ++ndeleted; + } + } + + int beg = 0; + int end = ndeleted - 1; + int i = nlocal - 1; + while ((i > goneIdx[beg]) && (beg <= end)) { + if(i == goneIdx[end]){ + --end; + } + else{ + pairs::relocate_particle(ps, goneIdx[beg], i); + ++beg; + } + --i; + } + + delete[] goneIdx; + + ps->setTrackedVariableAsInteger("nlocal", nlocal - ndeleted); + ps->setTrackedVariableAsInteger("nghost", 0); + } + +private: + pairs::PairsRuntime *ps; + math::AABB aabb; +}; + +inline bool operator==(const ParticleDeleter& lhs, const ParticleDeleter& rhs) { + return lhs.aabb == rhs.aabb; +} + +} // namespace internal + +class ParticleDataHandling : public blockforest::BlockDataHandling<internal::ParticleDeleter> { +private: + pairs::PairsRuntime *ps; + +public: + ParticleDataHandling(pairs::PairsRuntime *ps_) : ps(ps_) {} + ~ParticleDataHandling() override = default; + + internal::ParticleDeleter *initialize(IBlock *const block) override { + return new internal::ParticleDeleter(ps, block->getAABB()); + } + + void serialize(IBlock *const block, const BlockDataID& id, mpi::SendBuffer& buffer) override { + serializeImpl(static_cast<Block*>(block), id, buffer, 0, false); + } + + internal::ParticleDeleter* deserialize(IBlock *const block) override { + return initialize(block); + } + + void deserialize(IBlock *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) override { + deserializeImpl(block, id, buffer); + } + + void serializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer, const uint_t child) override { + serializeImpl(block, id, buffer, child, true); + } + + void serializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::SendBuffer& buffer) override { + serializeImpl(block, id, buffer, 0, false); + } + + internal::ParticleDeleter *deserializeCoarseToFine(Block *const block) override { + return initialize(block); + } + + internal::ParticleDeleter *deserializeFineToCoarse(Block *const block) override { + return initialize(block); + } + + void deserializeCoarseToFine(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer) override { + deserializeImpl(block, id, buffer); + } + + void deserializeFineToCoarse(Block *const block, const BlockDataID& id, mpi::RecvBuffer& buffer, const uint_t) override { + deserializeImpl(block, id, buffer); + } + + void serializeImpl(Block *const block, const BlockDataID&, mpi::SendBuffer& buffer, const uint_t child, bool check_child) { + auto ptr = buffer.allocate<uint_t>(); + double aabb_check[6]; + + if(check_child) { + const auto child_id = BlockID(block->getId(), child); + const auto child_aabb = block->getForest().getAABBFromBlockId(child_id); + aabb_check[0] = child_aabb.xMin(); + aabb_check[1] = child_aabb.xMax(); + aabb_check[2] = child_aabb.yMin(); + aabb_check[3] = child_aabb.yMax(); + aabb_check[4] = child_aabb.zMin(); + aabb_check[5] = child_aabb.zMax(); + } else { + const auto aabb = block->getAABB(); + aabb_check[0] = aabb.xMin(); + aabb_check[1] = aabb.xMax(); + aabb_check[2] = aabb.yMin(); + aabb_check[3] = aabb.yMax(); + aabb_check[4] = aabb.zMin(); + aabb_check[5] = aabb.zMax(); + } + + int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + auto position = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); + int nserialized = 0; + int *goneIdx = new int[nlocal]; + + for (int i=0; i<nlocal; ++i) { + if (flags(i) & (pairs::flags::INFINITE | pairs::flags::GLOBAL)) continue; + const real_t pos_x = position(i, 0); + const real_t pos_y = position(i, 1); + const real_t pos_z = position(i, 2); + + // Important: When rebalancing, it is assumed that all particles are within domain bounds. + // If a particle's center of mass lies outside the domain, it won't be contained + // in any of the checked blocks during serialization. In that case, the particle + // can become disassociated from its owner if the new block it should belong to is + // not an immediate neighbor to its owner rank. (if it's in an immediate neighbor, it will be exchanged) + if( pos_x >= aabb_check[0] && pos_x < aabb_check[1] && + pos_y >= aabb_check[2] && pos_y < aabb_check[3] && + pos_z >= aabb_check[4] && pos_z < aabb_check[5]) { + + goneIdx[nserialized] = i; + ++nserialized; + + for(auto &prop: ps->getProperties()) { + if(!prop.isVolatile()) { + auto prop_type = prop.getType(); + + if(prop_type == pairs::Prop_Vector) { + auto vector_ptr = ps->getAsVectorProperty(prop); + constexpr int nelems = 3; + + for(int e = 0; e < nelems; e++) { + buffer << vector_ptr(i, e); + } + } else if(prop_type == pairs::Prop_Matrix) { + auto matrix_ptr = ps->getAsMatrixProperty(prop); + constexpr int nelems = 9; + + for(int e = 0; e < nelems; e++) { + buffer << matrix_ptr(i, e); + } + } else if(prop_type == pairs::Prop_Quaternion) { + auto quat_ptr = ps->getAsQuaternionProperty(prop); + constexpr int nelems = 4; + + for(int e = 0; e < nelems; e++) { + buffer << quat_ptr(i, e); + } + } else if(prop_type == pairs::Prop_Integer) { + auto int_ptr = ps->getAsIntegerProperty(prop); + buffer << int_ptr(i); + } else if(prop_type == pairs::Prop_UInt64) { + auto uint64_ptr = ps->getAsUInt64Property(prop); + buffer << uint64_ptr(i); + } else if(prop_type == pairs::Prop_Real) { + auto float_ptr = ps->getAsFloatProperty(prop); + buffer << float_ptr(i); + } else { + std::cerr << "serializeImpl(): Invalid property type!" << std::endl; + return; + } + } + } + // TODO: serialize contact history data as well + } + } + + // Here we replace serialized particles with the remaining locals + // (Traverse locals in reverse order and move them to empty slots) + // Ghosts are ignored since they become invalid after rebalancing + int beg = 0; + int end = nserialized - 1; + int i = nlocal - 1; + while ((i > goneIdx[beg]) && (beg <= end)) { + if(i == goneIdx[end]){ + --end; + } + else{ + pairs::relocate_particle(ps, goneIdx[beg], i); + ++beg; + } + --i; + } + + delete[] goneIdx; + + ps->setTrackedVariableAsInteger("nlocal", nlocal - nserialized); + ps->setTrackedVariableAsInteger("nghost", 0); + + *ptr = (uint_t) nserialized; + } + + void deserializeImpl(IBlock *const, const BlockDataID&, mpi::RecvBuffer& buffer) { + int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + int particle_capacity = ps->getTrackedVariableAsInteger("particle_capacity"); + real_t real_tmp; + int int_tmp; + uint_t nrecv; + uint64_t uint64_tmp; + + buffer >> nrecv; + + // TODO: Check if there is enough particle capacity for the new particles, when there is not, + // all properties and arrays which have particle_capacity as one of their dimensions must be reallocated + PAIRS_ASSERT(nlocal + nrecv < particle_capacity); + + for(int i = 0; i < nrecv; ++i) { + for(auto &prop: ps->getProperties()) { + if(!prop.isVolatile()) { + auto prop_type = prop.getType(); + + if(prop_type == pairs::Prop_Vector) { + auto vector_ptr = ps->getAsVectorProperty(prop); + constexpr int nelems = 3; + + for(int e = 0; e < nelems; e++) { + buffer >> real_tmp; + vector_ptr(nlocal + i, e) = real_tmp; + } + } else if(prop_type == pairs::Prop_Matrix) { + auto matrix_ptr = ps->getAsMatrixProperty(prop); + constexpr int nelems = 9; + + for(int e = 0; e < nelems; e++) { + buffer >> real_tmp; + matrix_ptr(nlocal + i, e) = real_tmp; + } + } else if(prop_type == pairs::Prop_Quaternion) { + auto quat_ptr = ps->getAsQuaternionProperty(prop); + constexpr int nelems = 4; + + for(int e = 0; e < nelems; e++) { + buffer >> real_tmp; + quat_ptr(nlocal + i, e) = real_tmp; + } + } else if(prop_type == pairs::Prop_Integer) { + auto int_ptr = ps->getAsIntegerProperty(prop); + buffer >> int_tmp; + int_ptr(nlocal + i) = int_tmp; + } else if(prop_type == pairs::Prop_UInt64) { + auto uint64_ptr = ps->getAsUInt64Property(prop); + buffer >> uint64_tmp; + uint64_ptr(nlocal + i) = uint64_tmp; + } else if(prop_type == pairs::Prop_Real) { + auto float_ptr = ps->getAsFloatProperty(prop); + buffer >> real_tmp; + float_ptr(nlocal + i) = real_tmp; + } else { + std::cerr << "deserializeImpl(): Invalid property type!" << std::endl; + return; + } + } + } + } + + ps->setTrackedVariableAsInteger("nlocal", nlocal + nrecv); + ps->setTrackedVariableAsInteger("nghost", 0); + } +}; + +} // namespace walberla diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0851f2c1733243792909dff5e15b8a8ddc9bb2e6 --- /dev/null +++ b/runtime/domain/block_forest.cpp @@ -0,0 +1,489 @@ +#include <map> +#include <mpi.h> +#include <vector> +//--- +#include <blockforest/BlockForest.h> +#include <blockforest/Initialization.h> +#include <blockforest/loadbalancing/DynamicCurve.h> +#include <blockforest/loadbalancing/DynamicDiffusive.h> +#include <blockforest/loadbalancing/DynamicParMetis.h> +#include <blockforest/loadbalancing/InfoCollection.h> +#include <blockforest/loadbalancing/PODPhantomData.h> +#include <blockforest/loadbalancing/level_determination/MinMaxLevelDetermination.h> +#include <blockforest/loadbalancing/weight_assignment/MetisAssignmentFunctor.h> +#include <blockforest/loadbalancing/weight_assignment/WeightAssignmentFunctor.h> +//--- +#include "../boundary_weights.hpp" +#include "../pairs_common.hpp" +#include "../devices/device.hpp" +#include "regular_6d_stencil.hpp" +#include "ParticleDataHandling.hpp" +#include "../unique_id.hpp" + +namespace pairs { + +BlockForest::BlockForest( + PairsRuntime *ps_, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_) : + DomainPartitioner(xmin, xmax, ymin, ymax, zmin, zmax), ps(ps_), globalPBC{pbcx, pbcy, pbcz}, balance_workload(balance_workload_) { + + subdom = new real_t[ndims * 2]; +} + +BlockForest::BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::blockforest::BlockForest> &bf) : + forest(bf), + DomainPartitioner(bf->getDomain().xMin(), bf->getDomain().xMax(), + bf->getDomain().yMin(), bf->getDomain().yMax(), + bf->getDomain().zMin(), bf->getDomain().zMax()), + ps(ps_), + globalPBC{bf->isXPeriodic(), bf->isYPeriodic(), bf->isZPeriodic()} { + subdom = new real_t[ndims * 2]; + mpiManager = walberla::mpi::MPIManager::instance(); + world_size = mpiManager->numProcesses(); + rank = mpiManager->rank(); + this->info = make_shared<walberla::blockforest::InfoCollection>(); +} + +void BlockForest::updateNeighborhood() { + std::map<int, std::vector<walberla::math::AABB>> neighborhood; + std::map<int, std::vector<walberla::BlockID>> blocks_pushed; + auto me = mpiManager->rank(); + this->nranks = 0; + this->total_aabbs = 0; + + ranks.clear(); + naabbs.clear(); + aabb_offsets.clear(); + aabbs.clear(); + for(auto& iblock: *forest) { + auto block = static_cast<walberla::blockforest::Block *>(&iblock); + for(uint neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + auto neighbor_rank = walberla::int_c(block->getNeighborProcess(neigh)); + + // Neighbor blocks that belong to the same rank should be added to + // neighboorhood only if there's PBC along any dim, otherwise they should be skipped. + // TODO: Make PBCs work with runtime load balancing + if((neighbor_rank != me) || globalPBC[0] || globalPBC[1] || globalPBC[2]) { + const walberla::BlockID& neighbor_id = block->getNeighborId(neigh); + walberla::math::AABB neighbor_aabb = block->getNeighborAABB(neigh); + auto begin = blocks_pushed[neighbor_rank].begin(); + auto end = blocks_pushed[neighbor_rank].end(); + + if(find_if(begin, end, [neighbor_id](const auto &bp) { return bp == neighbor_id; }) == end) { + neighborhood[neighbor_rank].push_back(neighbor_aabb); + blocks_pushed[neighbor_rank].push_back(neighbor_id); + } + } + } + } + + for(auto& nbh: neighborhood) { + auto rank = nbh.first; + auto aabb_list = nbh.second; + ranks.push_back((int) rank); + aabb_offsets.push_back(this->total_aabbs); + naabbs.push_back((int) aabb_list.size()); + + for(auto &aabb: aabb_list) { + aabbs.push_back(aabb.xMin()); + aabbs.push_back(aabb.xMax()); + aabbs.push_back(aabb.yMin()); + aabbs.push_back(aabb.yMax()); + aabbs.push_back(aabb.zMin()); + aabbs.push_back(aabb.zMax()); + this->total_aabbs++; + } + + this->nranks++; + } +} + +void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) { + void *src = name.compare("ranks") == 0 ? static_cast<void *>(ranks.data()) : + name.compare("naabbs") == 0 ? static_cast<void *>(naabbs.data()) : + name.compare("aabb_offsets") == 0 ? static_cast<void *>(aabb_offsets.data()) : + name.compare("aabbs") == 0 ? static_cast<void *>(aabbs.data()) : + name.compare("subdom") == 0 ? static_cast<void *>(subdom) : nullptr; + + PAIRS_ASSERT(src != nullptr); + bool is_real = (name.compare("aabbs") == 0) || (name.compare("subdom") == 0); + int tsize = is_real ? sizeof(real_t) : sizeof(int); + std::memcpy(dest, src, size * tsize); +} + +void BlockForest::updateWeights() { + walberla::mpi::BufferSystem bs(mpiManager->comm(), 756); + + info->clear(); + + int sum_block_locals = 0; + // Compute the weights for my blocks and their children + for(auto& iblock: *forest) { + auto block = static_cast<walberla::blockforest::Block *>(&iblock); + auto aabb = block->getAABB(); + auto& block_info = (*info)[block->getId()]; + + pairs::compute_boundary_weights( + this->ps, + aabb.xMin(), aabb.xMax(), aabb.yMin(), aabb.yMax(), aabb.zMin(), aabb.zMax(), + &(block_info.computationalWeight), &(block_info.communicationWeight)); + + sum_block_locals += block_info.computationalWeight; + + for(int branch = 0; branch < 8; ++branch) { + const auto b_id = walberla::BlockID(block->getId(), branch); + const auto b_aabb = forest->getAABBFromBlockId(b_id); + auto& b_info = (*info)[b_id]; + + pairs::compute_boundary_weights( + this->ps, + b_aabb.xMin(), b_aabb.xMax(), b_aabb.yMin(), b_aabb.yMax(), b_aabb.zMin(), b_aabb.zMax(), + &(b_info.computationalWeight), &(b_info.communicationWeight)); + } + } + + int non_globals = ps->getTrackedVariableAsInteger("nlocal") - UniqueID::getNumGlobals(); + + if(sum_block_locals!=non_globals){ + std::cout << "Warning: " << non_globals - sum_block_locals << " particles in rank " << rank << + " may get lost in the next rebalancing." << std::endl; + } + + // Send the weights of my blocks and their children to the neighbors of my blocks + for(auto& iblock: *forest) { + auto block = static_cast<walberla::blockforest::Block *>(&iblock); + auto& block_info = (*info)[block->getId()]; + + for(int neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + bs.sendBuffer(block->getNeighborProcess(neigh)) << + walberla::blockforest::InfoCollection::value_type(block->getId(), block_info); + } + + for(int branch = 0; branch < 8; ++branch) { + const auto b_id = walberla::BlockID(block->getId(), branch); + auto& b_info = (*info)[b_id]; + + for(int neigh = 0; neigh < block->getNeighborhoodSize(); ++neigh) { + bs.sendBuffer(block->getNeighborProcess(neigh)) << + walberla::blockforest::InfoCollection::value_type(b_id, b_info); + } + } + } + + bs.setReceiverInfoFromSendBufferState(false, true); + bs.sendAll(); + + for(auto recv = bs.begin(); recv != bs.end(); ++recv) { + while(!recv.buffer().isEmpty()) { + walberla::blockforest::InfoCollectionPair val; + recv.buffer() >> val; + info->insert(val); + } + } +} + +walberla::Vector3<int> BlockForest::getBlockConfig(int num_processes, int nx, int ny, int nz) { + const int bx_factor = 1; + const int by_factor = 1; + const int bz_factor = 1; + const int ax = nx * ny; + const int ay = nx * nz; + const int az = ny * nz; + + int bestsurf = 2 * (ax + ay + az); + int x = 1; + int y = 1; + int z = 1; + + for(int i = 1; i < num_processes; ++i) { + if(num_processes % i == 0) { + const int rem_yz = num_processes / i; + + for(int j = 1; j < rem_yz; ++j) { + if(rem_yz % j == 0) { + const int k = rem_yz / j; + const int surf = (ax / i / j) + (ay / i / k) + (az / j / k); + + if(surf < bestsurf) { + x = i, y = j, z = k; + bestsurf = surf; + } + } + } + } + } + + return walberla::Vector3<int>(x * bx_factor, y * by_factor, z * bz_factor); +} + +int BlockForest::getInitialRefinementLevel(int num_processes) { + int splitFactor = 8; + int blocks = 1; + int refinementLevel = 0; + + while(blocks < num_processes) { + refinementLevel++; + blocks *= splitFactor; + } + + return refinementLevel; +} + +void BlockForest::setBoundingBox() { + for (int i=0; i<6; ++i) subdom[i] = 0.0; + if (forest->empty()) return; + + auto aabb_union = forest->begin()->getAABB(); + for(auto& iblock: *forest) { + auto block = static_cast<walberla::blockforest::Block *>(&iblock); + aabb_union.merge(block->getAABB()); + } + + subdom[0] = aabb_union.xMin(); + subdom[1] = aabb_union.xMax(); + subdom[2] = aabb_union.yMin(); + subdom[3] = aabb_union.yMax(); + subdom[4] = aabb_union.zMin(); + subdom[5] = aabb_union.zMax(); +} + +void BlockForest::initialize(int *argc, char ***argv) { + mpiManager = walberla::mpi::MPIManager::instance(); + mpiManager->initializeMPI(argc, argv); + mpiManager->useWorldComm(); + world_size = mpiManager->numProcesses(); + rank = mpiManager->rank(); + + walberla::math::AABB domain( + grid_min[0], grid_min[1], grid_min[2], grid_max[0], grid_max[1], grid_max[2]); + + int gridsize[3] = {32, 32, 32}; + auto procs = mpiManager->numProcesses(); + auto block_config = balance_workload ? walberla::Vector3<int>(1, 1, 1) : + getBlockConfig(procs, gridsize[0], gridsize[1], gridsize[2]); + + auto ref_level = balance_workload ? getInitialRefinementLevel(procs) : 0; + + walberla::Vector3<bool> pbc(globalPBC[0], globalPBC[1], globalPBC[2]); + + forest = walberla::blockforest::createBlockForest(domain, block_config, pbc, procs, ref_level); + + this->info = make_shared<walberla::blockforest::InfoCollection>(); + + if (rank==0) { + std::cout << "Domain: " << domain << std::endl; + std::cout << "PBC: " << pbc << std::endl; + std::cout << "Block config: " << block_config << std::endl; + std::cout << "Initial refinement level: " << ref_level << std::endl; + std::cout << "Dynamic load balancing: " << (balance_workload ? "True" : "False") << std::endl; + } +} + +void BlockForest::update() { + if(balance_workload) { + if(!forest->loadBalancingFunctionRegistered()){ + std::cerr << "Workload balancer is not initialized." << std::endl; + exit(-1); + } + + this->updateWeights(); + const int nlocal = ps->getTrackedVariableAsInteger("nlocal"); + for(auto &prop: ps->getProperties()) { + if(!prop.isVolatile()) { + const int ptypesize = get_proptype_size(prop.getType()); + ps->copyPropertyToHost(prop, pairs::WriteAfterRead, nlocal*ptypesize); + } + } + + // PAIRS_DEBUG("Rebalance\n"); + if (rank==0) std::cout << "Rebalance" << std::endl; + forest->refresh(); +} + + this->updateNeighborhood(); + this->setBoundingBox(); +} + +void BlockForest::initWorkloadBalancer(LoadBalancingAlgorithms algorithm, size_t regridMin, size_t regridMax) { + if (rank==0) { + std::cout << "Load balancing algorithm: " << getAlgorithmName(algorithm) << std::endl; + std::cout << "regridMin = " << regridMin << ", regirdMax = " << regridMax << std::endl; + } + this->balance_workload = true; // balance_workload is set to true in case the forest has been initialized externally + real_t baseWeight = 1.0; + int maxBlocksPerProcess = 100; + + // Metis-specific params + real_t metisipc2redist = 1.0; + string metisAlgorithm = "PART_GEOM_KWAY"; + string metisWeightsToUse = "BOTH_WEIGHTS"; + string metisEdgeSource = "EDGES_FROM_EDGE_WEIGHTS"; + + forest->recalculateBlockLevelsInRefresh(true); + forest->alwaysRebalanceInRefresh(true); + forest->reevaluateMinTargetLevelsAfterForcedRefinement(true); + forest->allowRefreshChangingDepth(true); + + forest->allowMultipleRefreshCycles(false); + forest->checkForEarlyOutInRefresh(false); + forest->checkForLateOutInRefresh(false); + + // TODO: Define another functor that makes use of communicationWeight as well + forest->setRefreshMinTargetLevelDeterminationFunction( + walberla::blockforest::MinMaxLevelDetermination(info, regridMin, regridMax)); + + if(algorithm == Morton) { + forest->setRefreshPhantomBlockDataAssignmentFunction( + walberla::blockforest::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = walberla::blockforest::DynamicCurveBalance<walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeight>(false, true, false); + prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + + } else if(algorithm == Hilbert) { + forest->setRefreshPhantomBlockDataAssignmentFunction( + walberla::blockforest::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = walberla::blockforest::DynamicCurveBalance<walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeight>(true, true, false); + prepFunc.setMaxBlocksPerProcess(maxBlocksPerProcess); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + + } else if(algorithm == Metis) { + forest->setRefreshPhantomBlockDataAssignmentFunction( + walberla::blockforest::MetisAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction( + walberla::blockforest::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction( + walberla::blockforest::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto alg = walberla::blockforest::DynamicParMetis::stringToAlgorithm(metisAlgorithm); + auto vWeight = walberla::blockforest::DynamicParMetis::stringToWeightsToUse(metisWeightsToUse); + auto eWeight = walberla::blockforest::DynamicParMetis::stringToEdgeSource(metisEdgeSource); + auto prepFunc = walberla::blockforest::DynamicParMetis(alg, vWeight, eWeight); + + prepFunc.setipc2redist(metisipc2redist); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + + } else if(algorithm == Diffusive) { + forest->setRefreshPhantomBlockDataAssignmentFunction( + walberla::blockforest::WeightAssignmentFunctor(info, baseWeight)); + forest->setRefreshPhantomBlockDataPackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + forest->setRefreshPhantomBlockDataUnpackFunction( + walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor()); + + auto prepFunc = walberla::blockforest::DynamicDiffusionBalance<walberla::blockforest::WeightAssignmentFunctor::PhantomBlockWeight>(1, 1, false); + forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc); + } + else { + std::cerr << "Invalid load balancing algorithm." << std::endl; + exit(-1); + } + + forest->addBlockData(make_shared<walberla::ParticleDataHandling>(ps), "Interface"); +} + +void BlockForest::finalize() { + mpiManager->finalizeMPI(); +} + +int BlockForest::isWithinSubdomain(real_t x, real_t y, real_t z) { + for(auto& iblock: *forest) { + auto block = static_cast<walberla::blockforest::Block *>(&iblock); + + if(block->getAABB().contains(x, y, z)) { + return true; + } + } + + return false; +} + +void BlockForest::communicateSizes(int dim, const int *nsend, int *nrecv) { + std::vector<MPI_Request> send_requests; + std::vector<MPI_Request> recv_requests; + size_t nranks = 0; + + for(auto neigh_rank: ranks) { + if(neigh_rank != rank) { + MPI_Request send_req, recv_req; + MPI_Irecv(&nrecv[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &recv_req); + MPI_Isend(&nsend[nranks], 1, MPI_INT, neigh_rank, 0, MPI_COMM_WORLD, &send_req); + send_requests.push_back(send_req); + recv_requests.push_back(recv_req); + } else { + nrecv[nranks] = nsend[nranks]; + } + nranks++; + } + + if(!send_requests.empty()) { + MPI_Waitall(send_requests.size(), send_requests.data(), MPI_STATUSES_IGNORE); + } + if(!recv_requests.empty()) { + MPI_Waitall(recv_requests.size(), recv_requests.data(), MPI_STATUSES_IGNORE); + } +} + +void BlockForest::communicateData( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + std::vector<MPI_Request> send_requests; + std::vector<MPI_Request> recv_requests; + size_t nranks = 0; + + for(auto neigh_rank: ranks) { + const real_t *send_ptr = &send_buf[send_offsets[nranks] * elem_size]; + real_t *recv_ptr = &recv_buf[recv_offsets[nranks] * elem_size]; + + if(neigh_rank != rank) { + MPI_Request send_req, recv_req; + + MPI_Irecv(recv_ptr, nrecv[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0, MPI_COMM_WORLD, &recv_req); + MPI_Isend(send_ptr, nsend[nranks] * elem_size, MPI_DOUBLE, neigh_rank, 0, MPI_COMM_WORLD, &send_req); + + send_requests.push_back(send_req); + recv_requests.push_back(recv_req); + } else { + pairs::copy_in_device(recv_ptr, send_ptr, nsend[nranks] * elem_size * sizeof(real_t)); + } + + nranks++; + } + + if(!send_requests.empty()) { + MPI_Waitall(send_requests.size(), send_requests.data(), MPI_STATUSES_IGNORE); + } + + if(!recv_requests.empty()) { + MPI_Waitall(recv_requests.size(), recv_requests.data(), MPI_STATUSES_IGNORE); + } +} + +void BlockForest::communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + this->communicateData(dim, elem_size,send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv); +} + +void BlockForest::communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + this->communicateData(0, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv); +} + +} diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d814d02c423358b99f11622bbe2ed7f88b557f97 --- /dev/null +++ b/runtime/domain/block_forest.hpp @@ -0,0 +1,96 @@ +#include <memory> +#include <map> + +#include "../pairs_common.hpp" +#include "domain_partitioning.hpp" + +#pragma once + +#define SMALL 0.00001 + +namespace walberla { + namespace blockforest{ + class BlockForest; + class BlockID; + class BlockInfo; + using InfoCollection = std::map<BlockID, BlockInfo>; + } + + namespace mpi { + class MPIManager; + } + + namespace math{ + template<typename T> + class Vector3; + } +} +namespace pairs { + +class PairsRuntime; + +class BlockForest : public DomainPartitioner { +private: + std::shared_ptr<walberla::mpi::MPIManager> mpiManager; + std::shared_ptr<walberla::blockforest::BlockForest> forest; + std::shared_ptr<walberla::blockforest::InfoCollection> info; + std::vector<int> ranks; + std::vector<int> naabbs; + std::vector<int> aabb_offsets; + std::vector<double> aabbs; + PairsRuntime *ps; + real_t *subdom; + const bool globalPBC[3]; + int world_size, rank, nranks, total_aabbs; + bool balance_workload = false; + +public: + BlockForest( + PairsRuntime *ps_, + real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload_); + + BlockForest(PairsRuntime *ps_, const std::shared_ptr<walberla::blockforest::BlockForest> &bf); + + ~BlockForest() { + delete[] subdom; + } + + void initialize(int *argc, char ***argv); + void initWorkloadBalancer(LoadBalancingAlgorithms algorithm, size_t regridMin, size_t regridMax); + + void update(); + void finalize(); + int getWorldSize() const { return world_size; } + int getRank() const { return rank; } + int getNumberOfNeighborRanks() { return this->nranks; } + int getNumberOfNeighborAABBs() { return this->total_aabbs; } + double getSubdomMin(int dim) const { return subdom[2*dim + 0];} + double getSubdomMax(int dim) const { return subdom[2*dim + 1];} + + void updateNeighborhood(); + void updateWeights(); + walberla::math::Vector3<int> getBlockConfig(int num_processes, int nx, int ny, int nz); + int getInitialRefinementLevel(int num_processes); + void setBoundingBox(); + void rebalance(); + + int isWithinSubdomain(real_t x, real_t y, real_t z); + void copyRuntimeArray(const std::string& name, void *dest, const int size); + void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); + void communicateData( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + + void communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + + void communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); +}; + +} diff --git a/runtime/domain/domain_partitioning.hpp b/runtime/domain/domain_partitioning.hpp index e08e5eebba5576d6094205a837d69c606e4f7662..3dfdaaebfa8c9f91f58705fd1b750f53569afc04 100644 --- a/runtime/domain/domain_partitioning.hpp +++ b/runtime/domain/domain_partitioning.hpp @@ -8,6 +8,7 @@ class Regular6DStencil; class DomainPartitioner { friend class Regular6DStencil; + friend class BlockForest; protected: real_t *grid_min; @@ -36,13 +37,35 @@ public: delete[] grid_max; } + double getMin(int dim) const { return grid_min[dim]; } + double getMax(int dim) const { return grid_max[dim]; } + virtual double getSubdomMin(int dim) const = 0; + virtual double getSubdomMax(int dim) const = 0; virtual void initialize(int *argc, char ***argv) = 0; - virtual void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) = 0; + virtual void initWorkloadBalancer(LoadBalancingAlgorithms algorithm, size_t regridMin, size_t regridMax) = 0; + virtual void update() = 0; + virtual int getWorldSize() const = 0; + virtual int getRank() const = 0; + virtual int getNumberOfNeighborAABBs() = 0; + virtual int getNumberOfNeighborRanks() = 0; + virtual int isWithinSubdomain(real_t x, real_t y, real_t z) = 0; + virtual void copyRuntimeArray(const std::string& name, void *dest, const int size) = 0; virtual void communicateSizes(int dim, const int *nsend, int *nrecv) = 0; virtual void communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) = 0; + + virtual void communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) = 0; + + virtual void communicateAllData( + int ndims, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) = 0; + virtual void finalize() = 0; }; diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp index b01d2c76dc097391fd0e0204cc291f6ee65442b2..96ea998eb245ef6a3ed1add6302b57d8db78fc06 100644 --- a/runtime/domain/regular_6d_stencil.cpp +++ b/runtime/domain/regular_6d_stencil.cpp @@ -71,7 +71,7 @@ void Regular6DStencil::setBoundingBox() { MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; - subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; + subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t) myloc[d]; subdom_max[d] = subdom_min[d] + rank_length[d]; } @@ -89,6 +89,10 @@ void Regular6DStencil::initialize(int *argc, char ***argv) { this->setBoundingBox(); } +void Regular6DStencil::initWorkloadBalancer(LoadBalancingAlgorithms algorithm, size_t regridMin, size_t regridMax) {} + +void Regular6DStencil::update() {} + void Regular6DStencil::finalize() { MPI_Finalize(); } @@ -99,14 +103,24 @@ int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) { z >= subdom_min[2] && z < subdom_max[2] - SMALL; } -void Regular6DStencil::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { +void Regular6DStencil::copyRuntimeArray(const std::string& name, void *dest, const int size) { for(int d = 0; d < ndims; d++) { - neighbor_ranks[d * 2 + 0] = prev[d]; - neighbor_ranks[d * 2 + 1] = next[d]; - pbc[d * 2 + 0] = pbc_prev[d]; - pbc[d * 2 + 1] = pbc_next[d]; - subdom[d * 2 + 0] = subdom_min[d]; - subdom[d * 2 + 1] = subdom_max[d]; + if(name.compare("neighbor_ranks") == 0) { + int *neighbor_ranks = static_cast<int *>(dest); + neighbor_ranks[d * 2 + 0] = prev[d]; + neighbor_ranks[d * 2 + 1] = next[d]; + } else if(name.compare("pbc") == 0) { + int *pbc = static_cast<int *>(dest); + pbc[d * 2 + 0] = pbc_prev[d]; + pbc[d * 2 + 1] = pbc_next[d]; + } else if(name.compare("subdom") == 0) { + real_t *subdom = static_cast<real_t *>(dest); + subdom[d * 2 + 0] = subdom_min[d]; + subdom[d * 2 + 1] = subdom_max[d]; + } else { + std::cerr << "copyRuntimeArray(): Array \"" << name << "\" is invalid." << std::endl; + exit(-1); + } } } @@ -131,8 +145,6 @@ void Regular6DStencil::communicateData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //MPI_Request recv_requests[2]; - //MPI_Request send_requests[2]; const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; @@ -143,16 +155,6 @@ void Regular6DStencil::communicateData( send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Irecv( - recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &recv_requests[0]); - - MPI_Isend( - send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &send_requests[0]); - */ } else { pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t)); } @@ -162,22 +164,38 @@ void Regular6DStencil::communicateData( send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); + } +} - /* - MPI_Irecv( - recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &recv_requests[1]); +void Regular6DStencil::communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - MPI_Isend( - send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &send_requests[1]); - */ + const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; + const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; + real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; + real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; + + if(prev[dim] != rank) { + MPI_Sendrecv( + send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, + recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else { - pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); + pairs::copy_in_device(recv_prev, send_prev, nsend[dim * 2 + 0] * elem_size * sizeof(real_t)); } - //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE); - //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE); + if(next[dim] != rank) { + MPI_Sendrecv( + send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, + recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + pairs::copy_in_device(recv_next, send_next, nsend[dim * 2 + 1] * elem_size * sizeof(real_t)); + } } void Regular6DStencil::communicateAllData( @@ -185,9 +203,6 @@ void Regular6DStencil::communicateAllData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); - //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); - for (int d = 0; d < ndims; d++) { const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[d * 2 + 1] * elem_size]; @@ -199,16 +214,6 @@ void Regular6DStencil::communicateAllData( send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Isend( - send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 0]); - - MPI_Irecv( - recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 0]); - */ } else { pairs::copy_in_device(recv_prev, send_prev, nsend[d * 2 + 0] * elem_size * sizeof(real_t)); } @@ -218,23 +223,10 @@ void Regular6DStencil::communicateAllData( send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - /* - MPI_Isend( - send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &send_requests[d * 2 + 1]); - - MPI_Irecv( - recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, - MPI_COMM_WORLD, &recv_requests[d * 2 + 1]); - */ } else { pairs::copy_in_device(recv_next, send_next, nsend[d * 2 + 1] * elem_size * sizeof(real_t)); } } - - //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); - //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); } } diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp index 330af65a6ccb140cef8d283eac3ab183d0503c45..b4a9e5c6634c6f15c89041f539f0b955ecce992f 100644 --- a/runtime/domain/regular_6d_stencil.hpp +++ b/runtime/domain/regular_6d_stencil.hpp @@ -51,17 +51,30 @@ public: void setConfig(); void setBoundingBox(); void initialize(int *argc, char ***argv); + void initWorkloadBalancer(LoadBalancingAlgorithms algorithm, size_t regridMin, size_t regridMax); + void update(); void finalize(); + int getWorldSize() const { return world_size; } int getRank() const { return rank; } + int getNumberOfNeighborRanks() { return 6; } + int getNumberOfNeighborAABBs() { return 6; } + double getSubdomMin(int dim) const { return subdom_min[dim];} + double getSubdomMax(int dim) const { return subdom_max[dim];} + int isWithinSubdomain(real_t x, real_t y, real_t z); - void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom); + void copyRuntimeArray(const std::string& name, void *dest, const int size); void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); void communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateAllData( int ndims, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, diff --git a/runtime/feature_property.hpp b/runtime/feature_property.hpp index 025a514766a01643c21915e362a24481664af5fd..e3221b4cefcfdc20f7d70d612c2d7be3f90c89ae 100644 --- a/runtime/feature_property.hpp +++ b/runtime/feature_property.hpp @@ -22,13 +22,13 @@ public: nkinds(nkinds_), array_size(array_size_) {} - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } - PropertyType getType() { return type; } - size_t getNumberOfKinds() { return nkinds; } - size_t getArraySize() { return array_size; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } + PropertyType getType() const { return type; } + size_t getNumberOfKinds() const { return nkinds; } + size_t getArraySize() const { return array_size; } }; } diff --git a/runtime/math/MathTrait.h b/runtime/math/MathTrait.h new file mode 100644 index 0000000000000000000000000000000000000000..44362c9f53fc0767ba707b39cb14ce3067cca176 --- /dev/null +++ b/runtime/math/MathTrait.h @@ -0,0 +1,633 @@ +#pragma once + +//************************************************************************************************* +// Includes +//************************************************************************************************* + +#include <cstddef> + +namespace pairs { + + +//================================================================================================= +// +// MATHEMATICAL TRAIT +// +//================================================================================================= + +//************************************************************************************************* +/*!\class MathTrait + * \brief Base template for the MathTrait class. + * \ingroup math + * + * \section mathtrait_general General + * + * The MathTrait class template offers the possibility to select the resulting data type + * of a generic mathematical operation. In case of operations between built-in data types, + * the MathTrait class defines the more significant data type as the resulting data type. + * For this selection, signed data types are given a higher significance. In case of + * operations involving user-defined data types, the MathTrait template specifies the + * resulting data type of this operation.\n + * Specifying the resulting data type for a specific operation is done by specializing + * the MathTrait template for this particular type combination. In case a certain type + * combination is not defined in a MathTrait specialization, the base template is selected, + * which defines no resulting types and therefore stops the compilation process. Each + * specialization defines the data types \a HighType that represents the high-order data + * type of the two given data types and \a LowType that represents the low-order data type. + * Additionally, each specialization defines the types \a AddType, \a SubType, \a MultType + * and \a DivType, that represent the type of the resulting data type of the corresponding + * mathematical operation. The following example shows the specialization for operations + * between the double and the integer type: + + \code + template<> + struct MathTrait< double, int > + { + typedef double HighType; + typedef int LowType; + typedef double AddType; + typedef double SubType; + typedef double MultType; + typedef double DivType; + }; + \endcode + + * Per default, the MathTrait template provides specializations for the following built-in + * data types: + * + * <ul> + * <li>integers</li> + * <ul> + * <li>unsigned char, signed char, char, wchar_t</li> + * <li>unsigned short, short</li> + * <li>unsigned int, int</li> + * <li>unsigned long, long</li> + * <li>std::size_t, std::ptrdiff_t (for certain 64-bit compilers)</li> + * </ul> + * <li>floating points</li> + * <ul> + * <li>float</li> + * <li>double</li> + * <li>long double</li> + * </ul> + * </ul> + * + * + * \n \section specializations Creating custom specializations + * + * It is possible to specialize the MathTrait template for additional user-defined data types. + * However, it is possible that a specific mathematical operation is invalid for the particular + * type combination. In this case, the INVALID_NUMERICAL_TYPE can be used to fill the missing + * type definition. The INVALID_NUMERICAL_TYPE represents the resulting data type of an invalid + * numerical operation. It is left undefined to stop the compilation process in case it is + * instantiated. The following example shows the specialization of the MathTrait template for + * Matrix3 and Vector3. In this case, only the multiplication between the matrix and the vector + * is a valid numerical operation. Therefore for all other types the INVALID_NUMERICAL_TYPE is + * used. + + \code + template< typename T1, typename T2 > + struct MathTrait< Matrix3<T1>, Vector3<T2> > + { + typedef INVALID_NUMERICAL_TYPE HighType; // Invalid, no common high data type + typedef INVALID_NUMERICAL_TYPE LowType; // Invalid, no common low data type + typedef INVALID_NUMERICAL_TYPE AddType; // Invalid, cannot add a matrix and a vector + typedef INVALID_NUMERICAL_TYPE SubType; // Invalid, cannot subtract a vector from a matrix + typedef Vector3< typename MathTrait<T1,T2>::MultType > MultType; // Multiplication between a matrix and a vector + typedef INVALID_NUMERICAL_TYPE DivType; // Invalid, cannot divide a matrix by a vector + }; + \endcode + + * \n \section mathtrait_examples Examples + * + * The following example demonstrates the use of the MathTrait template, where depending on + * the two given data types the resulting data type is selected: + + \code + template< typename T1, typename T2 > // The two generic types + typename MathTrait<T1,T2>::HighType // The resulting generic return type + add( T1 t1, T2 t2 ) // + { // The function 'add' returns the sum + return t1 + t2; // of the two given values + } // + \endcode + + * Additionally, the specializations of the MathTrait template enable arithmetic operations + * between any combination of the supported data types: + + \code + typedef Vector3< Matrix3< float > > VectorOfMatrices; // Vector of single-precision matrices + typedef Vector3< Vector3 < double > > VectorOfVectors; // Vector of double-precision vectors + typedef Vector3< double > VectorOfScalars; // Vector of double-precision scalars + + VectorOfMatrices vm; // Setup of a vector of matrices + VectorOfVectors vv; // Setup of a vector of vectors + + // Calculation of the scalar product between the two vectors. The resulting data type + // is a plain 3-dimensional vector of scalar values of type double. + VectorOfScalars res = vm * vv; + \endcode + */ +//************************************************************************************************* + +//strange but needed for compatibility reasons with visual studio compiler +//backward compatibility to old PAIRS code +template< typename T1, typename T2 > +struct MathTrait +{ + using HighType = T1; + using LowType = T2; + using High = T1; + using Low = T2; +}; + +template< typename T> +struct MathTrait< T, T > +{ + using HighType = T; + using LowType = T; + using High = T; + using Low = T; +}; + + +//================================================================================================= +// +// MATHTRAIT SPECIALIZATION MACRO +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +/*!\brief Macro for the creation of MathTrait specializations for the built-in data types. + * \ingroup math + * + * This macro is used for the setup of the MathTrait specializations for the built-in data + * types. + */ +#define PAIRS_CREATE_MATHTRAIT_SPECIALIZATION(T1,T2,HIGH,LOW) \ + template<> \ + struct MathTrait< T1, T2 > \ + { \ + typedef HIGH HighType; \ + typedef LOW LowType; \ + typedef HIGH High; \ + typedef LOW Low; \ + typedef HIGH AddType; \ + typedef HIGH SubType; \ + typedef HIGH MultType; \ + typedef HIGH DivType; \ + } +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// UNSIGNED CHAR SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , unsigned char , unsigned char , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , char , char , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , signed char , signed char , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , wchar_t , wchar_t , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , unsigned short, unsigned short, unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , short , short , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , unsigned int , unsigned int , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , int , int , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , unsigned long , unsigned long , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , long , long , unsigned char ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , std::size_t , std::size_t , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , std::ptrdiff_t, std::ptrdiff_t, unsigned char ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , float , float , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , double , double , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned char , long double , long double , unsigned char ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// CHAR SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , unsigned char , char , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , char , char , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , signed char , signed char , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , wchar_t , wchar_t , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , unsigned short, unsigned short, char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , short , short , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , unsigned int , unsigned int , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , int , int , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , unsigned long , unsigned long , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , long , long , char ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , std::size_t , std::size_t , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , std::ptrdiff_t, std::ptrdiff_t, char ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , float , float , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , double , double , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( char , long double , long double , char ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// SIGNED CHAR SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , unsigned char , signed char , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , char , signed char , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , signed char , signed char , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , wchar_t , wchar_t , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , unsigned short, unsigned short, signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , short , short , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , unsigned int , unsigned int , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , int , int , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , unsigned long , unsigned long , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , long , long , signed char ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , std::size_t , std::size_t , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , std::ptrdiff_t, std::ptrdiff_t, signed char ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , float , float , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , double , double , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( signed char , long double , long double , signed char ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// WCHAR_T SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , unsigned char , wchar_t , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , char , wchar_t , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , signed char , wchar_t , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , wchar_t , wchar_t , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , unsigned short, unsigned short, wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , short , short , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , unsigned int , unsigned int , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , int , int , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , unsigned long , unsigned long , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , long , long , wchar_t ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , std::size_t , std::size_t , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , std::ptrdiff_t, std::ptrdiff_t, wchar_t ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , float , float , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , double , double , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( wchar_t , long double , long double , wchar_t ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// UNSIGNED SHORT SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, unsigned char , unsigned short, unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, char , unsigned short, char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, signed char , unsigned short, signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, wchar_t , unsigned short, wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, unsigned short, unsigned short, unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, short , short , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, unsigned int , unsigned int , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, int , int , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, unsigned long , unsigned long , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, long , long , unsigned short ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, std::size_t , std::size_t , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, std::ptrdiff_t, std::ptrdiff_t, unsigned short ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, float , float , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, double , double , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned short, long double , long double , unsigned short ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// SHORT SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , unsigned char , short , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , char , short , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , signed char , short , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , wchar_t , short , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , unsigned short, short , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , short , short , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , unsigned int , unsigned int , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , int , int , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , unsigned long , unsigned long , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , long , long , short ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , std::size_t , std::size_t , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , std::ptrdiff_t, std::ptrdiff_t, short ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , float , float , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , double , double , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( short , long double , long double , short ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// UNSIGNED INT SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , unsigned char , unsigned int , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , char , unsigned int , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , signed char , unsigned int , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , wchar_t , unsigned int , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , unsigned short, unsigned int , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , short , unsigned int , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , unsigned int , unsigned int , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , int , int , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , unsigned long , unsigned long , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , long , long , unsigned int ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , std::size_t , std::size_t , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , std::ptrdiff_t, std::ptrdiff_t, unsigned int ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , float , float , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , double , double , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned int , long double , long double , unsigned int ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// INT SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , unsigned char , int , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , char , int , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , signed char , int , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , wchar_t , int , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , unsigned short, int , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , short , int , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , unsigned int , int , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , int , int , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , unsigned long , unsigned long , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , long , long , int ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , std::size_t , std::size_t , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , std::ptrdiff_t, std::ptrdiff_t, int ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , float , float , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , double , double , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( int , long double , long double , int ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// UNSIGNED LONG SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , unsigned char , unsigned long , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , char , unsigned long , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , signed char , unsigned long , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , wchar_t , unsigned long , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , unsigned short, unsigned long , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , short , unsigned long , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , unsigned int , unsigned long , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , int , unsigned long , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , unsigned long , unsigned long , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , long , long , unsigned long ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , std::size_t , std::size_t , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , std::ptrdiff_t, std::ptrdiff_t, unsigned long ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , float , float , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , double , double , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( unsigned long , long double , long double , unsigned long ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// LONG SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , unsigned char , long , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , char , long , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , signed char , long , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , wchar_t , long , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , unsigned short, long , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , short , long , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , unsigned int , long , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , int , long , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , unsigned long , long , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , long , long , long ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , std::size_t , std::size_t , long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , std::ptrdiff_t, std::ptrdiff_t, long ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , float , float , long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , double , double , long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long , long double , long double , long ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// SIZE_T SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +#if defined(_WIN64) +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , unsigned char , std::size_t , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , char , std::size_t , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , signed char , std::size_t , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , wchar_t , std::size_t , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , unsigned short, std::size_t , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , short , std::size_t , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , unsigned int , std::size_t , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , int , std::size_t , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , unsigned long , std::size_t , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , long , std::size_t , long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , std::size_t , std::size_t , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , float , float , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , double , double , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( std::size_t , long double , long double , std::size_t ); +/*! \endcond */ +#endif +//************************************************************************************************* + + + + +//================================================================================================= +// +// FLOAT SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , unsigned char , float , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , char , float , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , signed char , float , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , wchar_t , float , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , unsigned short, float , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , short , float , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , unsigned int , float , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , int , float , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , unsigned long , float , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , long , float , long ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , std::size_t , float , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , std::ptrdiff_t, float , std::ptrdiff_t ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , float , float , float ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , double , double , float ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( float , long double , long double , float ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// DOUBLE SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , unsigned char , double , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , char , double , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , signed char , double , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , wchar_t , double , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , unsigned short, double , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , short , double , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , unsigned int , double , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , int , double , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , unsigned long , double , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , long , double , long ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , std::size_t , double , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , std::ptrdiff_t, double , std::ptrdiff_t ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , float , double , float ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , double , double , double ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( double , long double , long double , double ); +/*! \endcond */ +//************************************************************************************************* + + + + +//================================================================================================= +// +// LONG DOUBLE SPECIALIZATIONS +// +//================================================================================================= + +//************************************************************************************************* +/*! \cond internal */ +// Type 1 Type 2 High type Low type +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , unsigned char , long double , unsigned char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , char , long double , char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , signed char , long double , signed char ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , wchar_t , long double , wchar_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , unsigned short, long double , unsigned short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , short , long double , short ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , unsigned int , long double , unsigned int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , int , long double , int ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , unsigned long , long double , unsigned long ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , long , long double , long ); +#if defined(_WIN64) +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , std::size_t , long double , std::size_t ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , std::ptrdiff_t, long double , std::ptrdiff_t ); +#endif +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , float , long double , float ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , double , long double , double ); +PAIRS_CREATE_MATHTRAIT_SPECIALIZATION( long double , long double , long double , long double ); +/*! \endcond */ +//************************************************************************************************* + +#undef PAIRS_CREATE_MATHTRAIT_SPECIALIZATION + +} diff --git a/runtime/math/Vector3.hpp b/runtime/math/Vector3.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2e5b4f0b984953251d7a9791cc6a7354fc4b0bfb --- /dev/null +++ b/runtime/math/Vector3.hpp @@ -0,0 +1,43 @@ +#pragma once +#include <iostream> + +#include "../pairs_common.hpp" +#include "MathTrait.h" + + +namespace pairs { + +#define HIGH typename MathTrait<Type,Other>::High + +template< typename Type > +class Vector3 { +public: + Vector3() = default; + + // If the constructor is called from device, v_ is automatically allocated on + // device because it's a static array embeded in the object itself + PAIRS_ATTR_HOST_DEVICE Vector3( Type x, Type y, Type z ) { + v_[0] = x; + v_[1] = y; + v_[2] = z; + } + + template< typename Other > + PAIRS_ATTR_HOST_DEVICE inline Vector3<HIGH> operator+( const Vector3<Other>& rhs ) const{ + return Vector3<HIGH>( v_[0]+static_cast<Type>(rhs.v_[0]), v_[1]+static_cast<Type>(rhs.v_[1]), v_[2]+static_cast<Type>(rhs.v_[2]) ); + } + + PAIRS_ATTR_HOST_DEVICE Type& operator[]( int index ) { + return v_[index]; + } + + PAIRS_ATTR_HOST_DEVICE const Type& operator[] ( int index ) const { + return v_[index]; + } + +private: + Type v_[3] = {Type(), Type(), Type()}; +}; +#undef HIGH + +} diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index f3d56730ab38e04fd4f9ea975643069d78cfe2db..6efead8d8cb598c1fcd481beb35c12464c235830 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -7,39 +7,61 @@ #include "pairs.hpp" #include "pairs_common.hpp" #include "devices/device.hpp" +#include "domain/block_forest.hpp" #include "domain/regular_6d_stencil.hpp" namespace pairs { -void PairsSimulation::initDomain( +void PairsRuntime::initDomain( int *argc, char ***argv, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) { + real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, + bool pbcx, bool pbcy, bool pbcz, + bool balance_workload) { + + int mpi_initialized=0; + MPI_Initialized(&mpi_initialized); + + if(mpi_initialized){ + PAIRS_ERROR("MPI is already initialized!\n"); + exit(-1); + } + if(dom_part){ + PAIRS_ERROR("DomainPartitioner already exists!\n"); + exit(-1); + } - if(dom_part_type == Regular) { + if(dom_part_type == RegularPartitioning) { const int flags[] = {1, 1, 1}; dom_part = new Regular6DStencil(xmin, xmax, ymin, ymax, zmin, zmax, flags); - } else if(dom_part_type == RegularXY) { + } else if(dom_part_type == RegularXYPartitioning) { const int flags[] = {1, 1, 0}; dom_part = new Regular6DStencil(xmin, xmax, ymin, ymax, zmin, zmax, flags); - } else { - PAIRS_EXCEPTION("Domain partitioning type not implemented!\n"); + } + +#ifdef USE_WALBERLA + else if(dom_part_type == BlockForestPartitioning) { + dom_part = new BlockForest(this, xmin, xmax, ymin, ymax, zmin, zmax, pbcx, pbcy, pbcz, balance_workload); + } +#endif + + else { + PAIRS_ERROR("(initDomain) Domain partitioning type not implemented!\n"); + exit(-1); } dom_part->initialize(argc, argv); } -void PairsSimulation::addArray(Array array) { - int id = array.getId(); - auto a = std::find_if( - arrays.begin(), - arrays.end(), - [id](Array _a) { return _a.getId() == id; }); +void PairsRuntime::addArray(Array array) { + PAIRS_ASSERT( + std::find_if(arrays.begin(), arrays.end(), [array](Array _a) { + return _a.getId() == array.getId(); + }) == std::end(arrays)); - PAIRS_ASSERT(a == std::end(arrays)); arrays.push_back(array); } -Array &PairsSimulation::getArray(array_t id) { +Array &PairsRuntime::getArray(array_t id) { auto a = std::find_if( arrays.begin(), arrays.end(), @@ -49,7 +71,7 @@ Array &PairsSimulation::getArray(array_t id) { return *a; } -Array &PairsSimulation::getArrayByName(std::string name) { +Array &PairsRuntime::getArrayByName(std::string name) { auto a = std::find_if( arrays.begin(), arrays.end(), @@ -59,7 +81,7 @@ Array &PairsSimulation::getArrayByName(std::string name) { return *a; } -Array &PairsSimulation::getArrayByHostPointer(const void *h_ptr) { +Array &PairsRuntime::getArrayByHostPointer(const void *h_ptr) { auto a = std::find_if( arrays.begin(), arrays.end(), @@ -69,18 +91,16 @@ Array &PairsSimulation::getArrayByHostPointer(const void *h_ptr) { return *a; } -void PairsSimulation::addProperty(Property prop) { - int id = prop.getId(); - auto p = std::find_if( - properties.begin(), - properties.end(), - [id](Property _p) { return _p.getId() == id; }); +void PairsRuntime::addProperty(Property prop) { + PAIRS_ASSERT( + std::find_if(properties.begin(), properties.end(), [prop](Property _p) { + return _p.getId() == prop.getId(); + }) == std::end(properties)); - PAIRS_ASSERT(p == std::end(properties)); properties.push_back(prop); } -Property &PairsSimulation::getProperty(property_t id) { +Property &PairsRuntime::getProperty(property_t id) { auto p = std::find_if( properties.begin(), properties.end(), @@ -90,7 +110,7 @@ Property &PairsSimulation::getProperty(property_t id) { return *p; } -Property &PairsSimulation::getPropertyByName(std::string name) { +Property &PairsRuntime::getPropertyByName(std::string name) { auto p = std::find_if( properties.begin(), properties.end(), @@ -100,18 +120,19 @@ Property &PairsSimulation::getPropertyByName(std::string name) { return *p; } -void PairsSimulation::addContactProperty(ContactProperty contact_prop) { - int id = contact_prop.getId(); - auto cp = std::find_if( - contact_properties.begin(), - contact_properties.end(), - [id](ContactProperty _cp) { return _cp.getId() == id; }); +void PairsRuntime::addContactProperty(ContactProperty contact_prop) { + PAIRS_ASSERT( + std::find_if( + contact_properties.begin(), + contact_properties.end(), + [contact_prop](ContactProperty _cp) { + return _cp.getId() == contact_prop.getId(); + }) == std::end(contact_properties)); - PAIRS_ASSERT(cp == std::end(contact_properties)); contact_properties.push_back(contact_prop); } -ContactProperty &PairsSimulation::getContactProperty(property_t id) { +ContactProperty &PairsRuntime::getContactProperty(property_t id) { auto cp = std::find_if( contact_properties.begin(), contact_properties.end(), @@ -121,7 +142,7 @@ ContactProperty &PairsSimulation::getContactProperty(property_t id) { return *cp; } -ContactProperty &PairsSimulation::getContactPropertyByName(std::string name) { +ContactProperty &PairsRuntime::getContactPropertyByName(std::string name) { auto cp = std::find_if( contact_properties.begin(), contact_properties.end(), @@ -131,18 +152,19 @@ ContactProperty &PairsSimulation::getContactPropertyByName(std::string name) { return *cp; } -void PairsSimulation::addFeatureProperty(FeatureProperty feature_prop) { - int id = feature_prop.getId(); - auto fp = std::find_if( - feature_properties.begin(), - feature_properties.end(), - [id](FeatureProperty _fp) { return _fp.getId() == id; }); +void PairsRuntime::addFeatureProperty(FeatureProperty feature_prop) { + PAIRS_ASSERT( + std::find_if( + feature_properties.begin(), + feature_properties.end(), + [feature_prop](FeatureProperty _fp) { + return _fp.getId() == feature_prop.getId(); + }) == std::end(feature_properties)); - PAIRS_ASSERT(fp == std::end(feature_properties)); feature_properties.push_back(feature_prop); } -FeatureProperty &PairsSimulation::getFeatureProperty(property_t id) { +FeatureProperty &PairsRuntime::getFeatureProperty(property_t id) { auto fp = std::find_if(feature_properties.begin(), feature_properties.end(), [id](FeatureProperty _fp) { return _fp.getId() == id; }); @@ -150,7 +172,7 @@ FeatureProperty &PairsSimulation::getFeatureProperty(property_t id) { return *fp; } -FeatureProperty &PairsSimulation::getFeaturePropertyByName(std::string name) { +FeatureProperty &PairsRuntime::getFeaturePropertyByName(std::string name) { auto fp = std::find_if(feature_properties.begin(), feature_properties.end(), [name](FeatureProperty _fp) { return _fp.getName() == name; }); @@ -158,7 +180,7 @@ FeatureProperty &PairsSimulation::getFeaturePropertyByName(std::string name) { return *fp; } -void PairsSimulation::copyArraySliceToDevice( +void PairsRuntime::copyArraySliceToDevice( Array &array, action_t action, size_t offset, size_t size) { int array_id = array.getId(); @@ -167,7 +189,7 @@ void PairsSimulation::copyArraySliceToDevice( if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { if(!array.isStatic()) { PAIRS_DEBUG( - "Copying array %s to device (offset=%d, n=%d)\n", + "Copying array %s to device (offset=%lu, n=%lu)\n", array.getName().c_str(), offset, size); pairs::copy_slice_to_device( @@ -183,16 +205,23 @@ void PairsSimulation::copyArraySliceToDevice( array_flags->setDeviceFlag(array_id); } -void PairsSimulation::copyArrayToDevice(Array &array, action_t action, size_t size) { +void PairsRuntime::copyArrayToDevice(Array &array, action_t action, size_t size) { int array_id = array.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { if(array.isStatic()) { - PAIRS_DEBUG("Copying static array %s to device (n=%d)\n", array.getName().c_str(), size); - pairs::copy_static_symbol_to_device(array.getHostPointer(), array.getDevicePointer(), size); + PAIRS_DEBUG( + "Copying static array %s to device (n=%lu)\n", + array.getName().c_str(), size); + + pairs::copy_static_symbol_to_device( + array.getHostPointer(), array.getDevicePointer(), size); } else { - PAIRS_DEBUG("Copying array %s to device (n=%d)\n", array.getName().c_str(), size); + PAIRS_DEBUG( + "Copying array %s to device (n=%lu)\n", + array.getName().c_str(), size); + pairs::copy_to_device(array.getHostPointer(), array.getDevicePointer(), size); } } @@ -205,14 +234,14 @@ void PairsSimulation::copyArrayToDevice(Array &array, action_t action, size_t si array_flags->setDeviceFlag(array_id); } -void PairsSimulation::copyArraySliceToHost(Array &array, action_t action, size_t offset, size_t size) { +void PairsRuntime::copyArraySliceToHost(Array &array, action_t action, size_t offset, size_t size) { int array_id = array.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { if(!array.isStatic()) { PAIRS_DEBUG( - "Copying array %s to host (offset=%d, n=%d)\n", + "Copying array %s to host (offset=%lu, n=%lu)\n", array.getName().c_str(), offset, size); pairs::copy_slice_to_host( @@ -228,16 +257,19 @@ void PairsSimulation::copyArraySliceToHost(Array &array, action_t action, size_t array_flags->setHostFlag(array_id); } -void PairsSimulation::copyArrayToHost(Array &array, action_t action, size_t size) { +void PairsRuntime::copyArrayToHost(Array &array, action_t action, size_t size) { int array_id = array.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { if(array.isStatic()) { - PAIRS_DEBUG("Copying static array %s to host (n=%d)\n", array.getName().c_str(), size); - pairs::copy_static_symbol_to_host(array.getDevicePointer(), array.getHostPointer(), size); + PAIRS_DEBUG( + "Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size); + + pairs::copy_static_symbol_to_host( + array.getDevicePointer(), array.getHostPointer(), size); } else { - PAIRS_DEBUG("Copying array %s to host (n=%d)\n", array.getName().c_str(), size); + PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size); pairs::copy_to_host(array.getDevicePointer(), array.getHostPointer(), size); } } @@ -250,12 +282,12 @@ void PairsSimulation::copyArrayToHost(Array &array, action_t action, size_t size array_flags->setHostFlag(array_id); } -void PairsSimulation::copyPropertyToDevice(Property &prop, action_t action, size_t size) { +void PairsRuntime::copyPropertyToDevice(Property &prop, action_t action, size_t size) { int prop_id = prop.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isDeviceFlagSet(prop_id)) { - PAIRS_DEBUG("Copying property %s to device (n=%d)\n", prop.getName().c_str(), size); + PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size); pairs::copy_to_device(prop.getHostPointer(), prop.getDevicePointer(), size); } } @@ -267,12 +299,12 @@ void PairsSimulation::copyPropertyToDevice(Property &prop, action_t action, size prop_flags->setDeviceFlag(prop_id); } -void PairsSimulation::copyPropertyToHost(Property &prop, action_t action, size_t size) { +void PairsRuntime::copyPropertyToHost(Property &prop, action_t action, size_t size) { int prop_id = prop.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isHostFlagSet(prop_id)) { - PAIRS_DEBUG("Copying property %s to host (n=%d)\n", prop.getName().c_str(), size); + PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size); pairs::copy_to_host(prop.getDevicePointer(), prop.getHostPointer(), size); } } @@ -284,15 +316,19 @@ void PairsSimulation::copyPropertyToHost(Property &prop, action_t action, size_t prop_flags->setHostFlag(prop_id); } -void PairsSimulation::copyContactPropertyToDevice( +void PairsRuntime::copyContactPropertyToDevice( ContactProperty &contact_prop, action_t action, size_t size) { int prop_id = contact_prop.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !contact_prop_flags->isDeviceFlagSet(prop_id)) { - PAIRS_DEBUG("Copying contact property %s to device (n=%d)\n", contact_prop.getName().c_str(), size); - pairs::copy_to_device(contact_prop.getHostPointer(), contact_prop.getDevicePointer(), size); + PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n", + contact_prop.getName().c_str(), size); + + pairs::copy_to_device( + contact_prop.getHostPointer(), contact_prop.getDevicePointer(), size); + contact_prop_flags->setDeviceFlag(prop_id); } } @@ -302,15 +338,19 @@ void PairsSimulation::copyContactPropertyToDevice( } } -void PairsSimulation::copyContactPropertyToHost( +void PairsRuntime::copyContactPropertyToHost( ContactProperty &contact_prop, action_t action, size_t size) { int prop_id = contact_prop.getId(); if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(!contact_prop_flags->isHostFlagSet(contact_prop.getId())) { - PAIRS_DEBUG("Copying contact property %s to host (n=%d)\n", contact_prop.getName().c_str(), size); - pairs::copy_to_host(contact_prop.getDevicePointer(), contact_prop.getHostPointer(), size); + PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n", + contact_prop.getName().c_str(), size); + + pairs::copy_to_host( + contact_prop.getDevicePointer(), contact_prop.getHostPointer(), size); + contact_prop_flags->setHostFlag(prop_id); } } @@ -320,13 +360,17 @@ void PairsSimulation::copyContactPropertyToHost( } } -void PairsSimulation::copyFeaturePropertyToDevice(FeatureProperty &feature_prop) { +void PairsRuntime::copyFeaturePropertyToDevice(FeatureProperty &feature_prop) { const size_t n = feature_prop.getArraySize(); - PAIRS_DEBUG("Copying feature property %s to device (n=%d)\n", feature_prop.getName().c_str(), n); - pairs::copy_static_symbol_to_device(feature_prop.getHostPointer(), feature_prop.getDevicePointer(), n); + + PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n", + feature_prop.getName().c_str(), n); + + pairs::copy_static_symbol_to_device( + feature_prop.getHostPointer(), feature_prop.getDevicePointer(), n); } -void PairsSimulation::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { +void PairsRuntime::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { auto nsend_id = getArrayByHostPointer(send_sizes).getId(); auto nrecv_id = getArrayByHostPointer(recv_sizes).getId(); @@ -341,7 +385,7 @@ void PairsSimulation::communicateSizes(int dim, const int *send_sizes, int *recv this->getTimers()->stop(Communication); } -void PairsSimulation::communicateData( +void PairsRuntime::communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { @@ -369,13 +413,22 @@ void PairsSimulation::communicateData( #else int nsend_all = 0; int nrecv_all = 0; - for(int d = 0; d <= dim; d++) { - nsend_all += nsend[d * 2 + 0]; - nsend_all += nsend[d * 2 + 1]; - nrecv_all += nrecv[d * 2 + 0]; - nrecv_all += nrecv[d * 2 + 1]; + if(this->dom_part_type == RegularPartitioning || this->dom_part_type == RegularXYPartitioning){ + for(int d = 0; d <= dim; d++) { + nsend_all += nsend[d * 2 + 0]; + nsend_all += nsend[d * 2 + 1]; + nrecv_all += nrecv[d * 2 + 0]; + nrecv_all += nrecv[d * 2 + 1]; + } } - + else if (this->dom_part_type == BlockForestPartitioning){ + int nranks = this->getDomainPartitioner()->getNumberOfNeighborRanks(); + for (int n=0; n<nranks; ++n){ + nsend_all += nsend[n]; + nrecv_all += nrecv[n]; + } + } + copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t)); array_flags->setHostFlag(recv_buf_id); array_flags->clearDeviceFlag(recv_buf_id); @@ -395,7 +448,70 @@ void PairsSimulation::communicateData( #endif } -void PairsSimulation::communicateAllData( +void PairsRuntime::communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + + const real_t *send_buf_ptr = send_buf; + real_t *recv_buf_ptr = recv_buf; + auto send_buf_array = getArrayByHostPointer(send_buf); + auto recv_buf_array = getArrayByHostPointer(recv_buf); + auto send_buf_id = send_buf_array.getId(); + auto recv_buf_id = recv_buf_array.getId(); + auto send_offsets_id = getArrayByHostPointer(send_offsets).getId(); + auto recv_offsets_id = getArrayByHostPointer(recv_offsets).getId(); + auto nsend_id = getArrayByHostPointer(nsend).getId(); + auto nrecv_id = getArrayByHostPointer(nrecv).getId(); + + this->getTimers()->start(DeviceTransfers); + copyArrayToHost(send_offsets_id, ReadOnly); + copyArrayToHost(recv_offsets_id, ReadOnly); + copyArrayToHost(nsend_id, ReadOnly); + copyArrayToHost(nrecv_id, ReadOnly); + + #ifdef ENABLE_CUDA_AWARE_MPI + send_buf_ptr = (real_t *) send_buf_array.getDevicePointer(); + recv_buf_ptr = (real_t *) recv_buf_array.getDevicePointer(); + #else + int nsend_all = 0; + int nrecv_all = 0; + if(this->dom_part_type == RegularPartitioning || this->dom_part_type == RegularXYPartitioning){ + for(int d = 2; d >= dim; d--) { + nsend_all += nsend[d * 2 + 0]; + nsend_all += nsend[d * 2 + 1]; + nrecv_all += nrecv[d * 2 + 0]; + nrecv_all += nrecv[d * 2 + 1]; + } + } + else if (this->dom_part_type == BlockForestPartitioning){ + int nranks = this->getDomainPartitioner()->getNumberOfNeighborRanks(); + for (int n=0; n<nranks; ++n){ // blockforest doesn't need reverse loop + nsend_all += nsend[n]; + nrecv_all += nrecv[n]; + } + } + + copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t)); + array_flags->setHostFlag(recv_buf_id); + array_flags->clearDeviceFlag(recv_buf_id); + #endif + + this->getTimers()->stop(DeviceTransfers); + + this->getTimers()->start(Communication); + this->getDomainPartitioner()->communicateDataReverse( + dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); + this->getTimers()->stop(Communication); + + #ifndef ENABLE_CUDA_AWARE_MPI + this->getTimers()->start(DeviceTransfers); + copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); + this->getTimers()->stop(DeviceTransfers); + #endif +} + +void PairsRuntime::communicateAllData( int ndims, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { @@ -423,11 +539,20 @@ void PairsSimulation::communicateAllData( #else int nsend_all = 0; int nrecv_all = 0; - for(int d = 0; d <= ndims; d++) { - nsend_all += nsend[d * 2 + 0]; - nsend_all += nsend[d * 2 + 1]; - nrecv_all += nrecv[d * 2 + 0]; - nrecv_all += nrecv[d * 2 + 1]; + if(this->dom_part_type == RegularPartitioning || this->dom_part_type == RegularXYPartitioning){ + for(int d = 0; d <= ndims; d++) { + nsend_all += nsend[d * 2 + 0]; + nsend_all += nsend[d * 2 + 1]; + nrecv_all += nrecv[d * 2 + 0]; + nrecv_all += nrecv[d * 2 + 1]; + } + } + else if (this->dom_part_type == BlockForestPartitioning){ + int nranks = this->getDomainPartitioner()->getNumberOfNeighborRanks(); + for (int n=0; n<nranks; ++n){ + nsend_all += nsend[n]; + nrecv_all += nrecv[n]; + } } copyArrayToHost(send_buf_id, Ignore, nsend_all * elem_size * sizeof(real_t)); @@ -449,7 +574,7 @@ void PairsSimulation::communicateAllData( #endif } -void PairsSimulation::communicateContactHistoryData( +void PairsRuntime::communicateContactHistoryData( int dim, int nelems_per_contact, const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, real_t *recv_buf, int *contact_roffsets, int *nrecv_contact) { @@ -515,8 +640,8 @@ void PairsSimulation::communicateContactHistoryData( #endif } -void PairsSimulation::fillCommunicationArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { - this->getDomainPartitioner()->fillArrays(neighbor_ranks, pbc, subdom); +void PairsRuntime::copyRuntimeArray(const std::string& name, void *dest, const int size) { + this->getDomainPartitioner()->copyRuntimeArray(name, dest, size); } } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 8944dfda738602867d7a9fb768cf81fd7e083d90..e87dec06224d830f8f15fdc2e593278c00d100e6 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -12,32 +12,37 @@ #include "property.hpp" #include "runtime_var.hpp" #include "timers.hpp" +#include "tracked_variable.hpp" #include "devices/device.hpp" +#include "domain/block_forest.hpp" #include "domain/regular_6d_stencil.hpp" #pragma once -#define FLAGS_INFINITE (1 << 0) -#define FLAGS_GHOST (1 << 1) -#define FLAGS_FIXED (1 << 2) -#define FLAGS_GLOBAL (1 << 3) namespace pairs { -class PairsSimulation { +class PairsRuntime { private: - Regular6DStencil *dom_part; - //DomainPartitioner *dom_part; + DomainPartitioner *dom_part; DomainPartitioners dom_part_type; std::vector<Property> properties; std::vector<ContactProperty> contact_properties; std::vector<FeatureProperty> feature_properties; std::vector<Array> arrays; + std::vector<TrackedVariable> tracked_variables; DeviceFlags *prop_flags, *contact_prop_flags, *array_flags; Timers<double> *timers; + int *nlocal, *nghost; public: - PairsSimulation(int nprops_, int ncontactprops_, int narrays_, DomainPartitioners dom_part_type_) { + PairsRuntime( + int nprops_, + int ncontactprops_, + int narrays_, + DomainPartitioners dom_part_type_) { + + dom_part = nullptr; dom_part_type = dom_part_type_; prop_flags = new DeviceFlags(nprops_); contact_prop_flags = new DeviceFlags(ncontactprops_); @@ -45,7 +50,7 @@ public: timers = new Timers<double>(1e-6); } - ~PairsSimulation() { + ~PairsRuntime() { dom_part->finalize(); delete prop_flags; delete contact_prop_flags; @@ -55,8 +60,40 @@ public: // Variables template<typename T> - RuntimeVar<T> addDeviceVariable(T *h_ptr) { - return RuntimeVar<T>(h_ptr); + RuntimeVar<T> &addDeviceVariable(T *h_ptr) { + // TODO: Proper memory mangement for RuntimeVar variables + RuntimeVar<T> *ret = new RuntimeVar<T>(h_ptr); + return *ret; + } + + void trackVariable(std::string variable_name, void *ptr) { + PAIRS_ASSERT( + std::find_if(tracked_variables.begin(), tracked_variables.end(), + [variable_name](TrackedVariable _v) { + return _v.getName() == variable_name; + }) == std::end(tracked_variables)); + + tracked_variables.push_back(TrackedVariable(variable_name, ptr)); + } + + TrackedVariable &getTrackedVariable(std::string variable_name) { + auto v = std::find_if( + tracked_variables.begin(), + tracked_variables.end(), + [variable_name](TrackedVariable _v) { return _v.getName() == variable_name; }); + + PAIRS_ASSERT(v != std::end(tracked_variables)); + return *v; + } + + void setTrackedVariableAsInteger(std::string variable_name, int value) { + auto& tv = getTrackedVariable(variable_name); + *(static_cast<int *>(tv.getPointer())) = value; + } + + int getTrackedVariableAsInteger(std::string variable_name) { + auto& tv = getTrackedVariable(variable_name); + return *(static_cast<int *>(tv.getPointer())); } // Arrays @@ -108,6 +145,7 @@ public: void copyArraySliceToHost(Array &array, action_t action, size_t offset, size_t size); // Properties + std::vector<Property> &getProperties() { return properties; }; Property &getProperty(property_t id); Property &getPropertyByName(std::string name); void addProperty(Property prop); @@ -115,11 +153,11 @@ public: template<typename T_ptr> void addProperty( property_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, - PropertyType type, layout_t layout, size_t sx, size_t sy = 1); + PropertyType type, layout_t layout, int vol, size_t sx, size_t sy = 1); template<typename T_ptr> void addProperty( property_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, - PropertyType type, layout_t layout, size_t sx, size_t sy = 1); + PropertyType type, layout_t layout, int vol, size_t sx, size_t sy = 1); template<typename T_ptr> void reallocProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx = 1, size_t sy = 1); @@ -131,6 +169,10 @@ public: return static_cast<IntProperty&>(prop); } + inline UInt64Property &getAsUInt64Property(Property &prop) { + return static_cast<UInt64Property&>(prop); + } + inline FloatProperty &getAsFloatProperty(Property &prop) { return static_cast<FloatProperty&>(prop); } @@ -151,6 +193,10 @@ public: return static_cast<IntProperty&>(getProperty(property)); } + inline UInt64Property &getUInt64Property(property_t property) { + return static_cast<UInt64Property&>(getProperty(property)); + } + inline FloatProperty &getFloatProperty(property_t property) { return static_cast<FloatProperty&>(getProperty(property)); } @@ -176,6 +222,10 @@ public: copyPropertyToDevice(getProperty(id), action, size); } + void copyPropertyToDevice(Property &prop, action_t action) { + copyPropertyToDevice(prop, action, prop.getTotalSize()); + } + void copyPropertyToDevice(Property &prop, action_t action, size_t size); void copyPropertyToHost(property_t id, action_t action) { @@ -193,6 +243,14 @@ public: void copyPropertyToHost(Property &prop, action_t action, size_t size); + DeviceFlags* getPropFlags(){ + return prop_flags; + } + + DeviceFlags* getArrayFlags(){ + return array_flags; + } + // Contact properties ContactProperty &getContactProperty(property_t id); ContactProperty &getContactPropertyByName(std::string name); @@ -262,9 +320,15 @@ public: // Communication void initDomain( int *argc, char ***argv, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax); + real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, + bool pbcx = 0, bool pbcy = 0, bool pbcz = 0, bool balance_workload = 0); - Regular6DStencil *getDomainPartitioner() { return dom_part; } + template<typename Domain_T> + void useDomain(const std::shared_ptr<Domain_T> &domain_ptr); + + void updateDomain() { dom_part->update(); } + + DomainPartitioner *getDomainPartitioner() { return dom_part; } void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); void communicateData( @@ -272,6 +336,11 @@ public: const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateDataReverse( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv); + void communicateAllData( int ndims, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, @@ -282,7 +351,9 @@ public: const real_t *send_buf, const int *contact_soffsets, const int *nsend_contact, real_t *recv_buf, int *contact_roffsets, int *nrecv_contact); - void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]); + void copyRuntimeArray(const std::string& name, void *dest, const int size); + int getNumberOfNeighborRanks() { return this->getDomainPartitioner()->getNumberOfNeighborRanks(); } + int getNumberOfNeighborAABBs() { return this->getDomainPartitioner()->getNumberOfNeighborAABBs(); } // Device functions void sync() { device_synchronize(); } @@ -296,8 +367,38 @@ public: } }; +template<typename Domain_T> +void PairsRuntime::useDomain(const std::shared_ptr<Domain_T> &domain_ptr){ + + if(dom_part){ + PAIRS_ERROR("DomainPartitioner already exists!\n"); + exit(-1); + } + + if(dom_part_type == RegularPartitioning) { + PAIRS_ERROR("useDomain not implemented for Regular6DStencil!\n"); + exit(-1); + + } else if(dom_part_type == RegularXYPartitioning) { + PAIRS_ERROR("useDomain not implemented for Regular6DStencil!\n"); + exit(-1); + + } + +#ifdef USE_WALBERLA + else if(dom_part_type == BlockForestPartitioning) { + dom_part = new BlockForest(this, domain_ptr); + } +#endif + + else { + PAIRS_ERROR("Domain partitioning type not implemented!\n"); + exit(-1); + } +} + template<typename T_ptr> -void PairsSimulation::addArray(array_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, size_t size) { +void PairsRuntime::addArray(array_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, size_t size) { PAIRS_ASSERT(size > 0); *h_ptr = (T_ptr *) pairs::host_alloc(size); @@ -306,7 +407,7 @@ void PairsSimulation::addArray(array_t id, std::string name, T_ptr **h_ptr, std: } template<typename T_ptr> -void PairsSimulation::addArray(array_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) { +void PairsRuntime::addArray(array_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) { PAIRS_ASSERT(size > 0); *h_ptr = (T_ptr *) pairs::host_alloc(size); @@ -316,19 +417,19 @@ void PairsSimulation::addArray(array_t id, std::string name, T_ptr **h_ptr, T_pt } template<typename T_ptr> -void PairsSimulation::addStaticArray(array_t id, std::string name, T_ptr *h_ptr, std::nullptr_t, size_t size) { +void PairsRuntime::addStaticArray(array_t id, std::string name, T_ptr *h_ptr, std::nullptr_t, size_t size) { addArray(Array(id, name, h_ptr, nullptr, size, true)); } template<typename T_ptr> -void PairsSimulation::addStaticArray(array_t id, std::string name, T_ptr *h_ptr, T_ptr *d_ptr, size_t size) { +void PairsRuntime::addStaticArray(array_t id, std::string name, T_ptr *h_ptr, T_ptr *d_ptr, size_t size) { addArray(Array(id, name, h_ptr, d_ptr, size, true)); } template<typename T_ptr> -void PairsSimulation::reallocArray(array_t id, T_ptr **h_ptr, std::nullptr_t, size_t size) { +void PairsRuntime::reallocArray(array_t id, T_ptr **h_ptr, std::nullptr_t, size_t size) { // This should be a pointer (and not a reference) in order to be modified - auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; }); + auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array _a) { return _a.getId() == id; }); PAIRS_ASSERT(a != std::end(arrays)); PAIRS_ASSERT(size > 0); @@ -341,9 +442,9 @@ void PairsSimulation::reallocArray(array_t id, T_ptr **h_ptr, std::nullptr_t, si } template<typename T_ptr> -void PairsSimulation::reallocArray(array_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) { +void PairsRuntime::reallocArray(array_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) { // This should be a pointer (and not a reference) in order to be modified - auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; }); + auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array _a) { return _a.getId() == id; }); PAIRS_ASSERT(a != std::end(arrays)); PAIRS_ASSERT(size > 0); @@ -363,20 +464,22 @@ void PairsSimulation::reallocArray(array_t id, T_ptr **h_ptr, T_ptr **d_ptr, siz } template<typename T_ptr> -void PairsSimulation::addProperty( - property_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, PropertyType type, layout_t layout, size_t sx, size_t sy) { +void PairsRuntime::addProperty( + property_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, + PropertyType type, layout_t layout, int vol, size_t sx, size_t sy) { size_t size = sx * sy * sizeof(T_ptr); PAIRS_ASSERT(size > 0); *h_ptr = (T_ptr *) pairs::host_alloc(size); PAIRS_ASSERT(*h_ptr != nullptr); - addProperty(Property(id, name, *h_ptr, nullptr, type, layout, sx, sy)); + addProperty(Property(id, name, *h_ptr, nullptr, type, layout, vol, sx, sy)); } template<typename T_ptr> -void PairsSimulation::addProperty( - property_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, PropertyType type, layout_t layout, size_t sx, size_t sy) { +void PairsRuntime::addProperty( + property_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, + PropertyType type, layout_t layout, int vol, size_t sx, size_t sy) { size_t size = sx * sy * sizeof(T_ptr); PAIRS_ASSERT(size > 0); @@ -384,11 +487,11 @@ void PairsSimulation::addProperty( *h_ptr = (T_ptr *) pairs::host_alloc(size); *d_ptr = (T_ptr *) pairs::device_alloc(size); PAIRS_ASSERT(*h_ptr != nullptr && *d_ptr != nullptr); - addProperty(Property(id, name, *h_ptr, *d_ptr, type, layout, sx, sy)); + addProperty(Property(id, name, *h_ptr, *d_ptr, type, layout, vol, sx, sy)); } template<typename T_ptr> -void PairsSimulation::reallocProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx, size_t sy) { +void PairsRuntime::reallocProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx, size_t sy) { // This should be a pointer (and not a reference) in order to be modified auto p = std::find_if(properties.begin(), properties.end(), @@ -407,7 +510,7 @@ void PairsSimulation::reallocProperty(property_t id, T_ptr **h_ptr, std::nullptr } template<typename T_ptr> -void PairsSimulation::reallocProperty(property_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t sx, size_t sy) { +void PairsRuntime::reallocProperty(property_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t sx, size_t sy) { // This should be a pointer (and not a reference) in order to be modified auto p = std::find_if(properties.begin(), properties.end(), @@ -433,7 +536,7 @@ void PairsSimulation::reallocProperty(property_t id, T_ptr **h_ptr, T_ptr **d_pt } template<typename T_ptr> -void PairsSimulation::addContactProperty( +void PairsRuntime::addContactProperty( property_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, PropertyType type, layout_t layout, size_t sx, size_t sy) { size_t size = sx * sy * sizeof(T_ptr); @@ -445,7 +548,7 @@ void PairsSimulation::addContactProperty( } template<typename T_ptr> -void PairsSimulation::addContactProperty( +void PairsRuntime::addContactProperty( property_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, PropertyType type, layout_t layout, size_t sx, size_t sy) { size_t size = sx * sy * sizeof(T_ptr); @@ -458,7 +561,7 @@ void PairsSimulation::addContactProperty( } template<typename T_ptr> -void PairsSimulation::reallocContactProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx, size_t sy) { +void PairsRuntime::reallocContactProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx, size_t sy) { // This should be a pointer (and not a reference) in order to be modified auto cp = std::find_if(contact_properties.begin(), contact_properties.end(), @@ -477,7 +580,7 @@ void PairsSimulation::reallocContactProperty(property_t id, T_ptr **h_ptr, std:: } template<typename T_ptr> -void PairsSimulation::reallocContactProperty(property_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t sx, size_t sy) { +void PairsRuntime::reallocContactProperty(property_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t sx, size_t sy) { // This should be a pointer (and not a reference) in order to be modified auto cp = std::find_if(contact_properties.begin(), contact_properties.end(), @@ -503,14 +606,14 @@ void PairsSimulation::reallocContactProperty(property_t id, T_ptr **h_ptr, T_ptr } template<typename T_ptr> -void PairsSimulation::addFeatureProperty(property_t id, std::string name, T_ptr *h_ptr, std::nullptr_t, PropertyType type, int nkinds, int array_size) { +void PairsRuntime::addFeatureProperty(property_t id, std::string name, T_ptr *h_ptr, std::nullptr_t, PropertyType type, int nkinds, int array_size) { PAIRS_ASSERT(nkinds > 0 && array_size > 0); PAIRS_ASSERT(h_ptr != nullptr); addFeatureProperty(FeatureProperty(id, name, h_ptr, nullptr, type, nkinds, array_size)); } template<typename T_ptr> -void PairsSimulation::addFeatureProperty(property_t id, std::string name, T_ptr *h_ptr, T_ptr *d_ptr, PropertyType type, int nkinds, int array_size) { +void PairsRuntime::addFeatureProperty(property_t id, std::string name, T_ptr *h_ptr, T_ptr *d_ptr, PropertyType type, int nkinds, int array_size) { PAIRS_ASSERT(nkinds > 0 && array_size > 0); PAIRS_ASSERT(h_ptr != nullptr && d_ptr != nullptr); addFeatureProperty(FeatureProperty(id, name, h_ptr, d_ptr, type, nkinds, array_size)); diff --git a/runtime/pairs_common.hpp b/runtime/pairs_common.hpp index c3cf60da1156d9ea6dc5c7e0e1c3bd581a62c8c5..74237423ee5d3de07462bcc40739484ef4fd9781 100644 --- a/runtime/pairs_common.hpp +++ b/runtime/pairs_common.hpp @@ -3,12 +3,39 @@ #pragma once +namespace pairs { + +#ifdef PAIRS_TARGET_CUDA + #define PAIRS_ATTR_HOST __host__ + #define PAIRS_ATTR_DEVICE __device__ + #define PAIRS_ATTR_HOST_DEVICE __host__ __device__ +#else + #define PAIRS_ATTR_HOST + #define PAIRS_ATTR_DEVICE + #define PAIRS_ATTR_HOST_DEVICE +#endif + +namespace flags{ + constexpr int INFINITE = 1 << 0 ; + constexpr int GHOST = 1 << 1 ; + constexpr int FIXED = 1 << 2 ; + constexpr int GLOBAL = 1 << 3 ; +} + +namespace shapes{ + enum Type { + Sphere = 0, + Halfspace = 1, + PointMass = 2 + }; +} //#ifdef USE_DOUBLE_PRECISION typedef double real_t; //#else //typedef float real_t; //#endif +typedef uint64_t id_t; typedef int array_t; typedef int property_t; typedef int layout_t; @@ -17,12 +44,25 @@ typedef int action_t; enum PropertyType { Prop_Invalid = -1, Prop_Integer = 0, + Prop_UInt64, Prop_Real, Prop_Vector, Prop_Matrix, Prop_Quaternion }; +constexpr size_t get_proptype_size(PropertyType type){ + switch (type) { + case pairs::Prop_Integer: return sizeof(int); + case pairs::Prop_UInt64: return sizeof(uint64_t); + case pairs::Prop_Real: return sizeof(real_t); + case pairs::Prop_Vector: return 3*sizeof(real_t); + case pairs::Prop_Matrix: return 9*sizeof(real_t); + case pairs::Prop_Quaternion: return 4*sizeof(real_t); + default: return 0; + } +} + enum DataLayout { Invalid = -1, AoS = 0, @@ -38,7 +78,7 @@ enum Actions { Ignore = 5 }; -enum Timers { +enum TimerMarkers { All = 0, Communication = 1, DeviceTransfers = 2, @@ -46,11 +86,28 @@ enum Timers { }; enum DomainPartitioners { - Regular = 0, - RegularXY = 1, - BoxList = 2, + RegularPartitioning = 0, + RegularXYPartitioning = 1, + BlockForestPartitioning = 2 +}; + +enum LoadBalancingAlgorithms { + Morton = 0, + Hilbert = 1, + Metis = 2, + Diffusive = 3 }; +constexpr const char* getAlgorithmName(LoadBalancingAlgorithms alg) { + switch (alg) { + case Morton: return "Morton"; + case Hilbert: return "Hilbert"; + case Metis: return "Metis"; + case Diffusive: return "Diffusive"; + default: return "Invalid"; + } +} + #ifdef DEBUG # include <assert.h> # define PAIRS_DEBUG(...) { \ @@ -78,3 +135,5 @@ enum DomainPartitioners { #define PAIRS_ERROR(...) fprintf(stderr, __VA_ARGS__) #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) + +} \ No newline at end of file diff --git a/runtime/property.hpp b/runtime/property.hpp index 741594d1745edf923f63bf8eae422aa11218f41a..fd2c5e41c583892318e1d27be91d671abe103793 100644 --- a/runtime/property.hpp +++ b/runtime/property.hpp @@ -11,32 +11,39 @@ protected: void *h_ptr, *d_ptr; PropertyType type; layout_t layout; + int vol; size_t sx, sy; public: - Property(property_t id_, std::string name_, void *h_ptr_, void *d_ptr_, PropertyType type_, layout_t layout_, size_t sx_, size_t sy_=1) : + Property( + property_t id_, std::string name_, void *h_ptr_, void *d_ptr_, + PropertyType type_, layout_t layout_, int vol_, size_t sx_, size_t sy_=1) : + id(id_), name(name_), h_ptr(h_ptr_), d_ptr(d_ptr_), type(type_), layout(layout_), + vol(vol_), sx(sx_), sy(sy_) { PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + int isVolatile() const { return vol != 0; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : + (type == Prop_UInt64) ? sizeof(uint64_t) : (type == Prop_Real) ? sizeof(real_t) : (type == Prop_Vector) ? sizeof(real_t) : (type == Prop_Matrix) ? sizeof(real_t) : @@ -49,6 +56,11 @@ public: inline int &operator()(int i) { return static_cast<int *>(h_ptr)[i]; } }; +class UInt64Property : public Property { +public: + inline uint64_t &operator()(int i) { return static_cast<uint64_t *>(h_ptr)[i]; } +}; + class FloatProperty : public Property { public: inline real_t &operator()(int i) { return static_cast<real_t *>(h_ptr)[i]; } diff --git a/runtime/read_from_file.cpp b/runtime/read_from_file.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25dcc97c2545474c1ea974824db665fc7e4af414 --- /dev/null +++ b/runtime/read_from_file.cpp @@ -0,0 +1,176 @@ +#include "read_from_file.hpp" + + +namespace pairs { + +void read_grid_data(PairsRuntime *ps, const char *filename, real_t *grid_buffer) { + std::ifstream in_file(filename, std::ifstream::in); + std::string line; + + if(!in_file.is_open()) { + std::cerr << "Error: Could not open file \"" << filename << "\"" << std::endl; + exit(-1); + } + + std::getline(in_file, line); + std::stringstream line_stream(line); + std::string in0; + int i = 0; + + while(std::getline(line_stream, in0, ',')) { + //PAIRS_ASSERT(i < ndims * 2); + grid_buffer[i] = std::stod(in0); + i++; + } + + in_file.close(); +} + +size_t read_particle_data( + PairsRuntime *ps, const char *filename, const property_t properties[], + size_t nprops, int shape_id, int start) { + + std::ifstream in_file(filename, std::ifstream::in); + std::string line; + auto shape_ptr = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); + auto uid_ptr = ps->getAsUInt64Property(ps->getPropertyByName("uid")); + int n = start; + + if(!in_file.is_open()) { + std::cerr << "Error: Could not open file \"" << filename << "\"" << std::endl; + exit(-1); + } + + while(std::getline(in_file, line)) { + std::stringstream line_stream(line); + std::string in0; + int within_domain = 1; + int i = 0; + int flags = 0; + + while(std::getline(line_stream, in0, ',')) { + property_t p_id = properties[i]; + auto prop = ps->getProperty(p_id); + auto prop_type = prop.getType(); + + if(prop_type == Prop_Vector) { + auto vector_ptr = ps->getAsVectorProperty(prop); + std::string in1, in2; + std::getline(line_stream, in1, ','); + std::getline(line_stream, in2, ','); + real_t x = std::stod(in0); + real_t y = std::stod(in1); + real_t z = std::stod(in2); + vector_ptr(n, 0) = x; + vector_ptr(n, 1) = y; + vector_ptr(n, 2) = z; + + if(prop.getName() == "position") { + within_domain = ps->getDomainPartitioner()->isWithinSubdomain(x, y, z); + } + } else if(prop_type == Prop_Matrix) { + auto matrix_ptr = ps->getAsMatrixProperty(prop); + constexpr int nelems = 9; + std::string in_buf; + + matrix_ptr(n, 0) = std::stod(in0); + for(int e = 1; e < nelems; e++) { + std::getline(line_stream, in_buf, ','); + matrix_ptr(n, e) = std::stod(in_buf); + } + } else if(prop_type == Prop_Quaternion) { + auto quat_ptr = ps->getAsQuaternionProperty(prop); + constexpr int nelems = 4; + std::string in_buf; + + quat_ptr(n, 0) = std::stod(in0); + for(int e = 1; e < nelems; e++) { + std::getline(line_stream, in_buf, ','); + quat_ptr(n, e) = std::stod(in_buf); + } + } else if(prop_type == Prop_Integer) { + auto int_ptr = ps->getAsIntegerProperty(prop); + int_ptr(n) = std::stoi(in0); + + if(prop.getName() == "flags") { + flags = int_ptr(n); + } + } else if(prop_type == Prop_UInt64) { + auto uint64_ptr = ps->getAsUInt64Property(prop); + uint64_ptr(n) = std::stoi(in0); + + if(prop.getName() == "uid") { + std::cerr << "Can't read uid from file." << std::endl; + exit(-1); + } + } else if(prop_type == Prop_Real) { + auto float_ptr = ps->getAsFloatProperty(prop); + float_ptr(n) = std::stod(in0); + } else { + std::cerr << "read_particle_data(): Invalid property type!" << std::endl; + return 0; + } + + i++; + } + + if(within_domain || flags & (flags::INFINITE | flags::FIXED | flags::GLOBAL)) { + uid_ptr(n) = (flags & flags::GLOBAL) ? UniqueID::createGlobal(ps) : UniqueID::create(ps); + shape_ptr(n++) = shape_id; + } + } + + return n; +} + +/* +size_t read_feature_data(PairsRuntime *ps, const char *filename, const int feature_id, const property_t properties[], size_t nprops) { + std::ifstream in_file(filename, std::ifstream::in); + std::string line; + + if(in_file.is_open()) { + while(std::getline(in_file, line)) { + std::stringstream line_stream(line); + std::string istr, jstr, in0; + std::getline(line_stream, istr, ','); + std::getline(line_stream, jstr, ','); + int i = std::stoi(istr); + int j = std::stoi(jstr); + + while(std::getline(line_stream, in0, ',')) { + property_t p_id = properties[i]; + auto prop = ps->getProperty(p_id); + auto prop_type = prop.getType(); + + if(prop_type == Prop_Vector) { + auto vector_ptr = ps->getAsVectorFeatureProperty(prop); + std::string in1, in2; + std::getline(line_stream, in1, ','); + std::getline(line_stream, in2, ','); + real_t x = std::stod(in0); + real_t y = std::stod(in1); + real_t z = std::stod(in2); + vector_ptr(i, j, 0) = x; + vector_ptr(i, j, 1) = y; + vector_ptr(i, j, 2) = z; + } else if(prop_type == Prop_Integer) { + auto int_ptr = ps->getAsIntegerFeatureProperty(prop); + int_ptr(i, j) = std::stoi(in0); + } else if(prop_type == Prop_Real) { + auto float_ptr = ps->getAsFloatFeatureProperty(prop); + float_ptr(i, j) = std::stod(in0); + } else { + std::cerr << "read_feature_data(): Invalid property type!" << std::endl; + return 0; + } + } + } + + in_file.close(); + } + + return n; +} +*/ + +} diff --git a/runtime/read_from_file.hpp b/runtime/read_from_file.hpp index 0173b46778809d13019cfbb42da2a579d2a8c212..abe1acf3edf4133ce2d17bd7c7ea82144279bc48 100644 --- a/runtime/read_from_file.hpp +++ b/runtime/read_from_file.hpp @@ -5,163 +5,16 @@ //--- #include "pairs.hpp" #include "pairs_common.hpp" +#include "unique_id.hpp" #pragma once namespace pairs { -void read_grid_data(PairsSimulation *ps, const char *filename, real_t *grid_buffer) { - std::ifstream in_file(filename, std::ifstream::in); - std::string line; +void read_grid_data(PairsRuntime *ps, const char *filename, real_t *grid_buffer); - if(in_file.is_open()) { - std::getline(in_file, line); - std::stringstream line_stream(line); - std::string in0; - int i = 0; - - while(std::getline(line_stream, in0, ',')) { - //PAIRS_ASSERT(i < ndims * 2); - grid_buffer[i] = std::stod(in0); - i++; - } - - in_file.close(); - } -} - -size_t read_particle_data(PairsSimulation *ps, const char *filename, const property_t properties[], size_t nprops, int shape_id, int start) { - std::ifstream in_file(filename, std::ifstream::in); - std::string line; - auto shape_ptr = ps->getAsIntegerProperty(ps->getPropertyByName("shape")); - size_t n = start; - - if(in_file.is_open()) { - //std::getline(in_file, line); - while(std::getline(in_file, line)) { - std::stringstream line_stream(line); - std::string in0; - int within_domain = 1; - int i = 0; - int flags = 0; - - while(std::getline(line_stream, in0, ',')) { - property_t p_id = properties[i]; - auto prop = ps->getProperty(p_id); - auto prop_type = prop.getType(); - - if(prop_type == Prop_Vector) { - auto vector_ptr = ps->getAsVectorProperty(prop); - std::string in1, in2; - std::getline(line_stream, in1, ','); - std::getline(line_stream, in2, ','); - real_t x = std::stod(in0); - real_t y = std::stod(in1); - real_t z = std::stod(in2); - vector_ptr(n, 0) = x; - vector_ptr(n, 1) = y; - vector_ptr(n, 2) = z; - - if(prop.getName() == "position") { - within_domain = ps->getDomainPartitioner()->isWithinSubdomain(x, y, z); - } - } else if(prop_type == Prop_Matrix) { - auto matrix_ptr = ps->getAsMatrixProperty(prop); - constexpr int nelems = 9; - std::string in_buf; - - matrix_ptr(n, 0) = std::stod(in0); - for(int i = 1; i < nelems; i++) { - std::getline(line_stream, in_buf, ','); - matrix_ptr(n, i) = std::stod(in_buf); - } - } else if(prop_type == Prop_Quaternion) { - auto quat_ptr = ps->getAsQuaternionProperty(prop); - constexpr int nelems = 4; - std::string in_buf; - - quat_ptr(n, 0) = std::stod(in0); - for(int i = 1; i < nelems; i++) { - std::getline(line_stream, in_buf, ','); - quat_ptr(n, i) = std::stod(in_buf); - } - } else if(prop_type == Prop_Integer) { - auto int_ptr = ps->getAsIntegerProperty(prop); - int_ptr(n) = std::stoi(in0); - - if(prop.getName() == "flags") { - flags = int_ptr(n); - } - } else if(prop_type == Prop_Real) { - auto float_ptr = ps->getAsFloatProperty(prop); - float_ptr(n) = std::stod(in0); - } else { - std::cerr << "read_particle_data(): Invalid property type!" << std::endl; - return 0; - } - - i++; - } - - if(within_domain || flags & (FLAGS_INFINITE | FLAGS_FIXED | FLAGS_GLOBAL)) { - shape_ptr(n++) = shape_id; - } - } - - in_file.close(); - } - - return n; -} - -/* -size_t read_feature_data(PairsSimulation *ps, const char *filename, const int feature_id, const property_t properties[], size_t nprops) { - std::ifstream in_file(filename, std::ifstream::in); - std::string line; - - if(in_file.is_open()) { - while(std::getline(in_file, line)) { - std::stringstream line_stream(line); - std::string istr, jstr, in0; - std::getline(line_stream, istr, ','); - std::getline(line_stream, jstr, ','); - int i = std::stoi(istr); - int j = std::stoi(jstr); - - while(std::getline(line_stream, in0, ',')) { - property_t p_id = properties[i]; - auto prop = ps->getProperty(p_id); - auto prop_type = prop.getType(); - - if(prop_type == Prop_Vector) { - auto vector_ptr = ps->getAsVectorFeatureProperty(prop); - std::string in1, in2; - std::getline(line_stream, in1, ','); - std::getline(line_stream, in2, ','); - real_t x = std::stod(in0); - real_t y = std::stod(in1); - real_t z = std::stod(in2); - vector_ptr(i, j, 0) = x; - vector_ptr(i, j, 1) = y; - vector_ptr(i, j, 2) = z; - } else if(prop_type == Prop_Integer) { - auto int_ptr = ps->getAsIntegerFeatureProperty(prop); - int_ptr(i, j) = std::stoi(in0); - } else if(prop_type == Prop_Real) { - auto float_ptr = ps->getAsFloatFeatureProperty(prop); - float_ptr(i, j) = std::stod(in0); - } else { - std::cerr << "read_feature_data(): Invalid property type!" << std::endl; - return 0; - } - } - } - - in_file.close(); - } - - return n; -} -*/ +size_t read_particle_data( + PairsRuntime *ps, const char *filename, const property_t properties[], + size_t nprops, int shape_id, int start); } diff --git a/runtime/runtime_var.hpp b/runtime/runtime_var.hpp index 7cf3aeaa9b6c32883299bfcb44044db457f4af48..7599b9771fab009be05df1a13ea6764e2c0d78ba 100644 --- a/runtime/runtime_var.hpp +++ b/runtime/runtime_var.hpp @@ -5,11 +5,12 @@ namespace pairs { template<typename T> -class RuntimeVar{ +class RuntimeVar { protected: T *h_ptr, *d_ptr; public: + RuntimeVar() = default; RuntimeVar(T *ptr) { h_ptr = ptr; d_ptr = (T *) pairs::device_alloc(sizeof(T)); diff --git a/runtime/stats.cpp b/runtime/stats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d66c34162937c36d2827be8d1bc8c3852461013c --- /dev/null +++ b/runtime/stats.cpp @@ -0,0 +1,29 @@ +#include "pairs.hpp" + +namespace pairs { + +void print_stats(PairsRuntime *ps, int nlocal, int nghost) { + int min_nlocal = nlocal; + int max_nlocal = nlocal; + int min_nghost = nghost; + int max_nghost = nghost; + int nglobal; + + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + MPI_Allreduce(&nlocal, &nglobal, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + min_nlocal = nglobal; + MPI_Allreduce(&nlocal, &nglobal, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + max_nlocal = nglobal; + MPI_Allreduce(&nghost, &nglobal, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + min_nghost = nglobal; + MPI_Allreduce(&nghost, &nglobal, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + max_nghost = nglobal; + } + + if(ps->getDomainPartitioner()->getRank() == 0) { + std::cout << "Number of local particles: " << min_nlocal << " / " << max_nlocal << std::endl; + std::cout << "Number of ghost particles: " << min_nghost << " / " << max_nghost << std::endl; + } +} + +} diff --git a/runtime/stats.hpp b/runtime/stats.hpp index 413ffab8425bc31d877e263d29fdfa4f9f343405..e6c51c306f83b347534963aac7c24f7a42f1b58d 100644 --- a/runtime/stats.hpp +++ b/runtime/stats.hpp @@ -2,32 +2,8 @@ #pragma once -using namespace std; - namespace pairs { -void print_stats(PairsSimulation *ps, int nlocal, int nghost) { - int min_nlocal = nlocal; - int max_nlocal = nlocal; - int min_nghost = nghost; - int max_nghost = nghost; - int nglobal; - - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - MPI_Allreduce(&nlocal, &nglobal, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); - min_nlocal = nglobal; - MPI_Allreduce(&nlocal, &nglobal, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - max_nlocal = nglobal; - MPI_Allreduce(&nghost, &nglobal, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); - min_nghost = nglobal; - MPI_Allreduce(&nghost, &nglobal, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - max_nghost = nglobal; - } - - if(ps->getDomainPartitioner()->getRank() == 0) { - std::cout << "Number of local particles: " << min_nlocal << " / " << max_nlocal << std::endl; - std::cout << "Number of ghost particles: " << min_nghost << " / " << max_nghost << std::endl; - } -} +void print_stats(PairsRuntime *ps, int nlocal, int nghost); } diff --git a/runtime/thermo.cpp b/runtime/thermo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..044f6d3639f8db286099a19b112b67b065f2cfa0 --- /dev/null +++ b/runtime/thermo.cpp @@ -0,0 +1,101 @@ +#include <iostream> +#include <math.h> +#include <mpi.h> +//--- +#include "pairs.hpp" + +namespace pairs { + +double compute_thermo( + PairsRuntime *ps, int nlocal, double xprd, double yprd, double zprd, int print) { + + auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); + auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); + int natoms = nlocal; + + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + int global_natoms; + MPI_Allreduce(&natoms, &global_natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + natoms = global_natoms; + } + + const double mvv2e = 1.0; + const double dof_boltz = (natoms * 3 - 3); + const double t_scale = mvv2e / dof_boltz; + const double p_scale = 1.0 / 3 / xprd / yprd / zprd; + //const double e_scale = 0.5; + double t = 0.0, p; + + ps->copyPropertyToHost(masses, ReadOnly); + ps->copyPropertyToHost(velocities, ReadOnly); + + for(int i = 0; i < nlocal; i++) { + t += masses(i) * ( velocities(i, 0) * velocities(i, 0) + + velocities(i, 1) * velocities(i, 1) + + velocities(i, 2) * velocities(i, 2) ); + } + + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + double global_t; + MPI_Allreduce(&t, &global_t, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + t = global_t; + } + + t = t * t_scale; + if(print == 1 && ps->getDomainPartitioner()->getRank() == 0) { + p = (t * dof_boltz) * p_scale; + std::cout << t << "\t" << p << std::endl; + } + + return t; +} + +void adjust_thermo( + PairsRuntime *ps, int nlocal, double xprd, double yprd, double zprd, double temp) { + + auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); + double vxtot = 0.0; + double vytot = 0.0; + double vztot = 0.0; + double tmp; + int natoms = nlocal; + + for(int i = 0; i < nlocal; i++) { + vxtot += velocities(i, 0); + vytot += velocities(i, 1); + vztot += velocities(i, 2); + } + + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + int global_natoms; + MPI_Allreduce(&natoms, &global_natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + natoms = global_natoms; + MPI_Allreduce(&vxtot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + vxtot = tmp / natoms; + MPI_Allreduce(&vytot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + vytot = tmp / natoms; + MPI_Allreduce(&vztot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + vztot = tmp / natoms; + } else { + vxtot /= natoms; + vytot /= natoms; + vztot /= natoms; + } + + for(int i = 0; i < nlocal; i++) { + velocities(i, 0) -= vxtot; + velocities(i, 1) -= vytot; + velocities(i, 2) -= vztot; + } + + double t = pairs::compute_thermo(ps, nlocal, xprd, yprd, zprd, 0); + double factor = sqrt(temp / t); + + for(int i = 0; i < nlocal; i++) { + velocities(i, 0) *= factor; + velocities(i, 1) *= factor; + velocities(i, 2) *= factor; + } +} + +} diff --git a/runtime/thermo.hpp b/runtime/thermo.hpp index b09693ab9ca47ea71c650205f68f30bbb44bcd2b..6902b007603478ec2f4ca23136c58a88b025eef1 100644 --- a/runtime/thermo.hpp +++ b/runtime/thermo.hpp @@ -1,99 +1,13 @@ -#include <iostream> -#include <math.h> -#include <mpi.h> -//--- #include "pairs.hpp" #pragma once namespace pairs { -double compute_thermo(PairsSimulation *ps, int nlocal, double xprd, double yprd, double zprd, int print) { - auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); - auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); - int natoms = nlocal; +double compute_thermo( + PairsRuntime *ps, int nlocal, double xprd, double yprd, double zprd, int print); - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - int global_natoms; - MPI_Allreduce(&natoms, &global_natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - natoms = global_natoms; - } - - const double mvv2e = 1.0; - const double dof_boltz = (natoms * 3 - 3); - const double t_scale = mvv2e / dof_boltz; - const double p_scale = 1.0 / 3 / xprd / yprd / zprd; - //const double e_scale = 0.5; - double t = 0.0, p; - - ps->copyPropertyToHost(masses, ReadOnly); - ps->copyPropertyToHost(velocities, ReadOnly); - - for(int i = 0; i < nlocal; i++) { - t += masses(i) * ( velocities(i, 0) * velocities(i, 0) + - velocities(i, 1) * velocities(i, 1) + - velocities(i, 2) * velocities(i, 2) ); - } - - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - double global_t; - MPI_Allreduce(&t, &global_t, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - t = global_t; - } - - t = t * t_scale; - if(print == 1 && ps->getDomainPartitioner()->getRank() == 0) { - p = (t * dof_boltz) * p_scale; - std::cout << t << "\t" << p << std::endl; - } - - return t; -} - -void adjust_thermo(PairsSimulation *ps, int nlocal, double xprd, double yprd, double zprd, double temp) { - auto velocities = ps->getAsVectorProperty(ps->getPropertyByName("linear_velocity")); - double vxtot = 0.0; - double vytot = 0.0; - double vztot = 0.0; - double tmp; - int natoms = nlocal; - - for(int i = 0; i < nlocal; i++) { - vxtot += velocities(i, 0); - vytot += velocities(i, 1); - vztot += velocities(i, 2); - } - - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - int global_natoms; - MPI_Allreduce(&natoms, &global_natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - natoms = global_natoms; - MPI_Allreduce(&vxtot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - vxtot = tmp / natoms; - MPI_Allreduce(&vytot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - vytot = tmp / natoms; - MPI_Allreduce(&vztot, &tmp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - vztot = tmp / natoms; - } else { - vxtot /= natoms; - vytot /= natoms; - vztot /= natoms; - } - - for(int i = 0; i < nlocal; i++) { - velocities(i, 0) -= vxtot; - velocities(i, 1) -= vytot; - velocities(i, 2) -= vztot; - } - - double t = pairs::compute_thermo(ps, nlocal, xprd, yprd, zprd, 0); - double factor = sqrt(temp / t); - - for(int i = 0; i < nlocal; i++) { - velocities(i, 0) *= factor; - velocities(i, 1) *= factor; - velocities(i, 2) *= factor; - } -} +void adjust_thermo( + PairsRuntime *ps, int nlocal, double xprd, double yprd, double zprd, double temp); } diff --git a/runtime/timers.hpp b/runtime/timers.hpp index 02058924ada457213214511a4099a05cf87695ac..c4cdc943aa5faeed57b4971684277e0844d3e7da 100644 --- a/runtime/timers.hpp +++ b/runtime/timers.hpp @@ -3,6 +3,8 @@ #include <iostream> #include <unordered_map> +#pragma once + using namespace std; namespace pairs { diff --git a/runtime/timing.cpp b/runtime/timing.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0068d8117b89d2529e3f8dc10b24e1d2483672e8 --- /dev/null +++ b/runtime/timing.cpp @@ -0,0 +1,23 @@ +#include "pairs.hpp" + +using namespace std; + +namespace pairs { + +void register_timer(PairsRuntime *ps, int id, std::string name) { + ps->getTimers()->add(id, name); +} + +void start_timer(PairsRuntime *ps, int id) { + ps->getTimers()->start(id); +} + +void stop_timer(PairsRuntime *ps, int id) { + ps->getTimers()->stop(id); +} + +void print_timers(PairsRuntime *ps) { + ps->printTimers(); +} + +} diff --git a/runtime/timing.hpp b/runtime/timing.hpp index 6c35e222e049fefd9aac991cf7e9e400ef3cc1ce..f7544603549232cce89c00e7071948da32511693 100644 --- a/runtime/timing.hpp +++ b/runtime/timing.hpp @@ -6,20 +6,9 @@ using namespace std; namespace pairs { -void register_timer(PairsSimulation *ps, int id, std::string name) { - ps->getTimers()->add(id, name); -} - -void start_timer(PairsSimulation *ps, int id) { - ps->getTimers()->start(id); -} - -void stop_timer(PairsSimulation *ps, int id) { - ps->getTimers()->stop(id); -} - -void print_timers(PairsSimulation *ps) { - ps->printTimers(); -} +void register_timer(PairsRuntime *ps, int id, std::string name); +void start_timer(PairsRuntime *ps, int id); +void stop_timer(PairsRuntime *ps, int id); +void print_timers(PairsRuntime *ps); } diff --git a/runtime/tracked_variable.hpp b/runtime/tracked_variable.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d9855125818bea7fc72be668c7d01f1af9a5ff3f --- /dev/null +++ b/runtime/tracked_variable.hpp @@ -0,0 +1,18 @@ +#include "pairs_common.hpp" + +#pragma once + +namespace pairs { + +class TrackedVariable { +protected: + std::string name; + void *ptr; + +public: + TrackedVariable(std::string name_, void *ptr_) : name(name_), ptr(ptr_) {} + std::string getName() { return name; } + void *getPointer() { return ptr; } +}; + +} diff --git a/runtime/unique_id.hpp b/runtime/unique_id.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cfc95a7298335a5c10b535a3296340a6d6552a41 --- /dev/null +++ b/runtime/unique_id.hpp @@ -0,0 +1,38 @@ +#include "pairs.hpp" + +#pragma once + +namespace pairs { + +class UniqueID{ +public: + inline static id_t create(PairsRuntime *pr); + inline static id_t createGlobal(PairsRuntime *pr); + inline static id_t getNumGlobals(); + +private: + static const id_t capacity = 1000000000; // max number of particles per rank + inline static id_t counter = 1; + inline static id_t globalCounter = 1; + +}; + +inline id_t UniqueID::getNumGlobals(){ + return globalCounter - 1; +} + +inline id_t UniqueID::create(PairsRuntime *pr){ + id_t rank = static_cast<id_t>(pr->getDomainPartitioner()->getRank()); + id_t id = rank*capacity + counter; + ++counter; + return id; +} + +inline id_t UniqueID::createGlobal(PairsRuntime *pr){ + id_t numranks = static_cast<id_t>(pr->getDomainPartitioner()->getWorldSize()); + id_t id = numranks*capacity + globalCounter; + ++globalCounter; + return id; +} + +} diff --git a/runtime/vtk.cpp b/runtime/vtk.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6235725c9d7d10cb38e4033a1e456b6b50ab32a --- /dev/null +++ b/runtime/vtk.cpp @@ -0,0 +1,198 @@ +#include <iomanip> +#include <iostream> +#include <fstream> +//--- +#include "pairs.hpp" + +namespace pairs { + +void vtk_write_aabb(PairsRuntime *ps, const char *filename, int num, + double xmin, double xmax, + double ymin, double ymax, + double zmin, double zmax){ + + std::string output_filename(filename); + const int prec = 8; + std::ostringstream filename_oss; + + filename_oss << filename << "_" << num; + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + filename_oss << "r" << ps->getDomainPartitioner()->getRank() ; + } + + filename_oss <<".vtk"; + std::ofstream out_file(filename_oss.str()); + + out_file << std::fixed << std::setprecision(prec); + if(out_file.is_open()) { + out_file << "# vtk DataFile Version 2.0\n"; + out_file << "Subdomains\n"; + out_file << "ASCII\n"; + out_file << "DATASET POLYDATA\n"; + out_file << "POINTS 8 double\n"; + + out_file << xmin << " " << ymin << " " << zmin << "\n"; + out_file << xmax << " " << ymin << " " << zmin << "\n"; + out_file << xmax << " " << ymax << " " << zmin << "\n"; + out_file << xmin << " " << ymax << " " << zmin << "\n"; + out_file << xmin << " " << ymin << " " << zmax << "\n"; + out_file << xmax << " " << ymin << " " << zmax << "\n"; + out_file << xmax << " " << ymax << " " << zmax << "\n"; + out_file << xmin << " " << ymax << " " << zmax << "\n"; + + out_file << "POLYGONS 6 30\n"; + + out_file << "4 0 1 2 3 \n"; + out_file << "4 4 5 6 7 \n"; + out_file << "4 0 1 5 4 \n"; + out_file << "4 3 2 6 7 \n"; + out_file << "4 0 4 7 3 \n"; + out_file << "4 1 2 6 5 \n"; + + out_file << "\n\n"; + out_file.close(); + } + else { + std::cerr << "vtk_write_aabb: Failed to open " << filename_oss.str() << std::endl; + exit(-1); + } + +} + +void vtk_write_subdom(PairsRuntime *ps, const char *filename, int timestep, int frequency){ + std::string output_filename(filename); + const int prec = 8; + std::ostringstream filename_oss; + + if(frequency != 0 && timestep % frequency != 0) { + return; + } + + filename_oss << filename << "_"; + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + filename_oss << "r" << ps->getDomainPartitioner()->getRank() << "_"; + } + + filename_oss << timestep << ".vtk"; + std::ofstream out_file(filename_oss.str()); + + double aabb[3][3]; + for (int d=0; d<3; ++d){ + aabb[d][0] = ps->getDomainPartitioner()->getSubdomMin(d); + aabb[d][1] = ps->getDomainPartitioner()->getSubdomMax(d); + } + + out_file << std::fixed << std::setprecision(prec); + if(out_file.is_open()) { + out_file << "# vtk DataFile Version 2.0\n"; + out_file << "Subdomains\n"; + out_file << "ASCII\n"; + out_file << "DATASET POLYDATA\n"; + out_file << "POINTS 8 double\n"; + + out_file << aabb[0][0] << " " << aabb[1][0] << " " << aabb[2][0] << "\n"; + out_file << aabb[0][1] << " " << aabb[1][0] << " " << aabb[2][0] << "\n"; + out_file << aabb[0][1] << " " << aabb[1][1] << " " << aabb[2][0] << "\n"; + out_file << aabb[0][0] << " " << aabb[1][1] << " " << aabb[2][0] << "\n"; + out_file << aabb[0][0] << " " << aabb[1][0] << " " << aabb[2][1] << "\n"; + out_file << aabb[0][1] << " " << aabb[1][0] << " " << aabb[2][1] << "\n"; + out_file << aabb[0][1] << " " << aabb[1][1] << " " << aabb[2][1] << "\n"; + out_file << aabb[0][0] << " " << aabb[1][1] << " " << aabb[2][1] << "\n"; + + out_file << "POLYGONS 6 30\n"; + + out_file << "4 0 1 2 3 \n"; + out_file << "4 4 5 6 7 \n"; + out_file << "4 0 1 5 4 \n"; + out_file << "4 3 2 6 7 \n"; + out_file << "4 0 4 7 3 \n"; + out_file << "4 1 2 6 5 \n"; + + out_file << "\n\n"; + out_file.close(); + } + else { + std::cerr << "vtk_write_subdoms: Failed to open " << filename_oss.str() << std::endl; + exit(-1); + } +} + +void vtk_write_data( + PairsRuntime *ps, const char *filename, int start, int end, int timestep, int frequency) { + + std::string output_filename(filename); + auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); + auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); + auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); + auto radius = ps->getAsFloatProperty(ps->getPropertyByName("radius")); + const int prec = 8; + int n = end - start; + std::ostringstream filename_oss; + + if(frequency != 0 && timestep % frequency != 0) { + return; + } + + filename_oss << filename << "_"; + if(ps->getDomainPartitioner()->getWorldSize() > 1) { + filename_oss << "r" << ps->getDomainPartitioner()->getRank() << "_"; + } + + filename_oss << timestep << ".vtk"; + std::ofstream out_file(filename_oss.str()); + + ps->copyPropertyToHost(masses, ReadOnly); + ps->copyPropertyToHost(positions, ReadOnly); + ps->copyPropertyToHost(flags, ReadOnly); + ps->copyPropertyToHost(radius, ReadOnly); + + for(int i = start; i < end; i++) { + if(flags(i) & flags::INFINITE) { + n--; + } + } + + if(out_file.is_open()) { + out_file << "# vtk DataFile Version 2.0\n"; + out_file << "Particle data\n"; + out_file << "ASCII\n"; + out_file << "DATASET POLYDATA\n"; + out_file << "POINTS " << n << " double\n"; + + for(int i = start; i < end; i++) { + if(!(flags(i) & flags::INFINITE)) { + out_file << std::fixed << std::setprecision(prec) << positions(i, 0) << " "; + out_file << std::fixed << std::setprecision(prec) << positions(i, 1) << " "; + out_file << std::fixed << std::setprecision(prec) << positions(i, 2) << "\n"; + } + } + + out_file << "\n\n"; + out_file << "POINT_DATA " << n << "\n"; + out_file << "SCALARS mass double 1\n"; + out_file << "LOOKUP_TABLE default\n"; + for(int i = start; i < end; i++) { + if(!(flags(i) & flags::INFINITE)) { + out_file << std::fixed << std::setprecision(prec) << masses(i) << "\n"; + } + } + + out_file << "\n\n"; + out_file << "SCALARS radius double 1\n"; + out_file << "LOOKUP_TABLE default\n"; + for(int i = start; i < end; i++) { + if(!(flags(i) & flags::INFINITE)) { + out_file << std::fixed << std::setprecision(prec) << radius(i) << "\n"; + } + } + + out_file << "\n\n"; + out_file.close(); + } + else { + std::cerr << "vtk_write_data: Failed to open " << filename_oss.str() << std::endl; + exit(-1); + } +} + +} diff --git a/runtime/vtk.hpp b/runtime/vtk.hpp index f122565694634594e0733017df8624a280681fc0..dcd97c020f1b49cdf82df548083687e1874f7c51 100644 --- a/runtime/vtk.hpp +++ b/runtime/vtk.hpp @@ -1,88 +1,17 @@ -#include <iomanip> -#include <iostream> -#include <fstream> -//--- #include "pairs.hpp" #pragma once namespace pairs { -void vtk_write_data(PairsSimulation *ps, const char *filename, int start, int end, int timestep, int frequency) { - std::string output_filename(filename); - auto masses = ps->getAsFloatProperty(ps->getPropertyByName("mass")); - auto positions = ps->getAsVectorProperty(ps->getPropertyByName("position")); - auto flags = ps->getAsIntegerProperty(ps->getPropertyByName("flags")); - const int prec = 8; - int n = end - start; - std::ostringstream filename_oss; +void vtk_write_aabb(PairsRuntime *ps, const char *filename, int num, + double xmin, double xmax, + double ymin, double ymax, + double zmin, double zmax); - if(frequency != 0 && timestep % frequency != 0) { - return; - } +void vtk_write_subdom(PairsRuntime *ps, const char *filename, int timestep, int frequency=1); - filename_oss << filename << "_"; - if(ps->getDomainPartitioner()->getWorldSize() > 1) { - filename_oss << "r" << ps->getDomainPartitioner()->getRank() << "_"; - } - - filename_oss << timestep << ".vtk"; - std::ofstream out_file(filename_oss.str()); - - ps->copyPropertyToHost(masses, ReadOnly); - ps->copyPropertyToHost(positions, ReadOnly); - ps->copyPropertyToHost(flags, ReadOnly); - - for(int i = start; i < end; i++) { - if(flags(i) & FLAGS_INFINITE) { - n--; - } - } - - if(out_file.is_open()) { - out_file << "# vtk DataFile Version 2.0\n"; - out_file << "Particle data\n"; - out_file << "ASCII\n"; - out_file << "DATASET UNSTRUCTURED_GRID\n"; - out_file << "POINTS " << n << " double\n"; - - for(int i = start; i < end; i++) { - if(!(flags(i) & FLAGS_INFINITE)) { - out_file << std::fixed << std::setprecision(prec) << positions(i, 0) << " "; - out_file << std::fixed << std::setprecision(prec) << positions(i, 1) << " "; - out_file << std::fixed << std::setprecision(prec) << positions(i, 2) << "\n"; - } - } - - out_file << "\n\n"; - out_file << "CELLS " << n << " " << (n * 2) << "\n"; - for(int i = start; i < end; i++) { - if(!(flags(i) & FLAGS_INFINITE)) { - out_file << "1 " << (i - start) << "\n"; - } - } - - out_file << "\n\n"; - out_file << "CELL_TYPES " << n << "\n"; - for(int i = start; i < end; i++) { - if(!(flags(i) & FLAGS_INFINITE)) { - out_file << "1\n"; - } - } - - out_file << "\n\n"; - out_file << "POINT_DATA " << n << "\n"; - out_file << "SCALARS mass double\n"; - out_file << "LOOKUP_TABLE default\n"; - for(int i = start; i < end; i++) { - if(!(flags(i) & FLAGS_INFINITE)) { - out_file << std::fixed << std::setprecision(prec) << masses(i) << "\n"; - } - } - - out_file << "\n\n"; - out_file.close(); - } -} +void vtk_write_data( + PairsRuntime *ps, const char *filename, int start, int end, int timestep, int frequency=1); } diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py index e89e0a79cbb8f3e39143dc3461a9734a6dcf8d7b..6525a9814b0fc4729c0cf3e24e5a748c5db3ae4b 100644 --- a/src/pairs/__init__.py +++ b/src/pairs/__init__.py @@ -2,6 +2,7 @@ from pairs.ir.types import Types from pairs.code_gen.cgen import CGen from pairs.code_gen.target import Target from pairs.sim.domain_partitioners import DomainPartitioners +from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms from pairs.sim.shapes import Shapes from pairs.sim.simulation import Simulation @@ -15,11 +16,12 @@ def simulation( use_contact_history=False, particle_capacity=800000, neighbor_capacity=100, - debug=False): + debug=False, + generate_whole_program=False): return Simulation( CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history, - particle_capacity, neighbor_capacity) + particle_capacity, neighbor_capacity, generate_whole_program) def target_cpu(parallel=False): if parallel: @@ -65,3 +67,18 @@ def regular_domain_partitioner(): def regular_domain_partitioner_xy(): return DomainPartitioners.RegularXY + +def block_forest(): + return DomainPartitioners.BlockForest + +def morton(): + return LoadBalancingAlgorithms.Morton + +def hilbert(): + return LoadBalancingAlgorithms.Hilbert + +def metis(): + return LoadBalancingAlgorithms.Metis + +def diffusive(): + return LoadBalancingAlgorithms.Diffusive \ No newline at end of file diff --git a/src/pairs/analysis/__init__.py b/src/pairs/analysis/__init__.py index 846843c3c72ee73a6359a74f397195eed453f8f2..7b200b201ef6b1126275c6656c98419b36e2d89a 100644 --- a/src/pairs/analysis/__init__.py +++ b/src/pairs/analysis/__init__.py @@ -2,18 +2,23 @@ import time from pairs.analysis.expressions import DetermineExpressionsTerminals, ResetInPlaceOperations, DetermineInPlaceOperations, ListDeclaredExpressions from pairs.analysis.blocks import DiscoverBlockVariants, DetermineExpressionsOwnership, DetermineParentBlocks from pairs.analysis.devices import FetchKernelReferences, MarkCandidateLoops -from pairs.analysis.modules import FetchModulesReferences +from pairs.analysis.modules import FetchModulesReferences, InferModulesReturnTypes class Analysis: + """Compiler analysis performed on P4IRS""" + def __init__(self, ast): - self._ast = ast + self._ast_list = ast if isinstance(ast, list) else [ast] def apply(self, analysis): print(f"Performing analysis: {type(analysis).__name__}... ", end="") start = time.time() - analysis.set_ast(self._ast) - analysis.visit() + + for ast in self._ast_list: + analysis.set_ast(ast) + analysis.visit() + elapsed = time.time() - start print(f"{elapsed:.2f}s elapsed.") @@ -46,3 +51,6 @@ class Analysis: def mark_candidate_loops(self): self.apply(MarkCandidateLoops()) + + def infer_modules_return_types(self): + self.apply(InferModulesReturnTypes()) \ No newline at end of file diff --git a/src/pairs/analysis/devices.py b/src/pairs/analysis/devices.py index d4552eba7183ab9a5371cb69e6bc6e07b22ed592..29e554e4606776693cdfd2dd784fc24d0b6995ea 100644 --- a/src/pairs/analysis/devices.py +++ b/src/pairs/analysis/devices.py @@ -12,30 +12,24 @@ from pairs.ir.vectors import VectorOp class MarkCandidateLoops(Visitor): def __init__(self, ast=None): super().__init__(ast) + self.device_module = False + + def visit_For(self, ast_node): + if self.device_module and not ast_node.not_kernel and (not isinstance(ast_node.min, Lit) or not isinstance(ast_node.max, Lit)): + ast_node.mark_as_kernel_candidate() + else: + ast_node.mark_iter_as_ref_candidate() + self.visit(ast_node.block) + def visit_Module(self, ast_node): - possible_candidates = [] - for stmt in ast_node._block.stmts: - if stmt is not None: - if isinstance(stmt, Branch): - for branch_stmt in stmt.block_if.stmts: - if isinstance(branch_stmt, For): - possible_candidates.append(branch_stmt) - - if stmt.block_else is not None: - for branch_stmt in stmt.block_else.stmts: - if isinstance(branch_stmt, For): - possible_candidates.append(branch_stmt) - - if isinstance(stmt, For): - possible_candidates.append(stmt) - - for stmt in possible_candidates: - if not isinstance(stmt.min, Lit) or not isinstance(stmt.max, Lit): - stmt.mark_as_kernel_candidate() + parent_runs_on_device = self.device_module + if ast_node.run_on_device: + self.device_module = True self.visit_children(ast_node) - + self.device_module = parent_runs_on_device + class FetchKernelReferences(Visitor): def __init__(self, ast=None): @@ -205,3 +199,12 @@ class FetchKernelReferences(Visitor): # Variables only have a device version when changed within kernels if self.writing: ast_node.device_flag = True + + def visit_Parameter(self, ast_node): + for k in self.kernel_stack: + k.add_parameter(ast_node, self.writing) + + def visit_Iter(self, ast_node): + for k in self.kernel_stack: + if ast_node.is_ref_candidate(): + k.add_iter(ast_node, self.writing) diff --git a/src/pairs/analysis/modules.py b/src/pairs/analysis/modules.py index 4cf0bf8002af97ad66ff2057301235c7756ab5ad..fd7bd11393525e91f96cd0e3a8baaa451209a7d2 100644 --- a/src/pairs/analysis/modules.py +++ b/src/pairs/analysis/modules.py @@ -1,5 +1,17 @@ from pairs.ir.visitor import Visitor +class InferModulesReturnTypes(Visitor): + def __init__(self, ast=None): + super().__init__(ast) + + def visit_Module(self, ast_node): + self.current_module = ast_node + self.visit_children(ast_node) + + def visit_Return(self, ast_node): + self.current_module._return_type = ast_node.expr.type() + self.visit_children(ast_node) + class FetchModulesReferences(Visitor): def __init__(self, ast=None): @@ -39,8 +51,16 @@ class FetchModulesReferences(Visitor): self.visit(ast_node.capacity) def visit_AtomicInc(self, ast_node): + visit_once = self.visit_nodes_once + self.visit_nodes_once = False + # Force write after read for the same node (visited twice) + self.writing = False + self.visit(ast_node.elem) self.writing = True self.visit(ast_node.elem) + self.visit_nodes_once = visit_once + + self.writing = False self.visit(ast_node.value) @@ -115,3 +135,8 @@ class FetchModulesReferences(Visitor): for m in self.module_stack: if not ast_node.temporary(): m.add_variable(ast_node, self.writing) + + def visit_Parameter(self, ast_node): + for m in self.module_stack: + # parameters are restricted to read-only, passed by value + m.add_parameter(ast_node, write=False) \ No newline at end of file diff --git a/src/pairs/code_gen/accessor.py b/src/pairs/code_gen/accessor.py new file mode 100644 index 0000000000000000000000000000000000000000..34421cb64cde53f7a75a9079b39ee90c25b7b235 --- /dev/null +++ b/src/pairs/code_gen/accessor.py @@ -0,0 +1,497 @@ +from pairs.ir.types import Types +from pairs.ir.features import FeatureProperty +from pairs.ir.properties import Property + +class PairsAcessor: + def __init__(self, cgen): + self.sim = cgen.sim + self.target = cgen.target + self.print = cgen.print + self.debug = cgen.debug + self.host_device_attr = "" + self.host_attr = "" + + def generate(self): + self.print("") + + if self.target.is_gpu(): + self.print("namespace pairs::internal{") + self.print.add_indent(4) + self.DeviceProps_struct() + self.HostProps_struct() + self.print.add_indent(-4) + self.print("}") + self.print("") + + if self.target.is_gpu(): + self.host_device_attr = "__host__ __device__ " + self.host_attr = "__host__ " + self.print("#include \"math/Vector3.hpp\"") + # self.print("#include \"math/Quaternion.hpp\"") + # self.print("#include \"math/Matrix3.hpp\"") + self.print("") + + self.print("class PairsAccessor {") + self.print("private:") + self.print.add_indent(4) + self.member_variables() + self.print.add_indent(-4) + self.print("public:") + self.print.add_indent(4) + + self.sync_ctx_enum() + self.update() + self.constructor() + # self.destructor() + + for p in self.sim.properties: + if (p.type()==Types.Vector) or (Types.is_scalar(p.type())): + self.get_property(p) + self.set_property(p) + self.sync_property(p) + + for fp in self.sim.feature_properties: + self.get_property(fp) + self.set_property(fp) + self.sync_feature_property(fp) + + self.utility_funcs() + + self.print.add_indent(-4) + self.print("};") + self.print("") + + def DeviceProps_struct(self): + self.print("struct DeviceProps{") + self.print.add_indent(4) + + self.print("int nlocal;") + self.print("int nghost;") + self.print("") + + self.print("//Property device pointers") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, p.type()) + self.print(f"{tkw} *{pname}_d;") + + self.print("") + self.print("//Property device flag pointers") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} *{pname}_device_flag_d;") + + self.print("") + self.print("//Feature properties on device are global") + + self.print("") + self.print("//Feature properties have no flags on device since they can't be modified on device") + + self.print.add_indent(-4) + self.print("};") + self.print("") + + def HostProps_struct(self): + self.print("// HostProps only contains property flags, since properties themselves can be directly accessed through ps->pobj") + self.print("// TODO: Move properties out of PairsObjects into DeviceProps and HostProps, so that all 3 structs have mutually exclusive members") + self.print("struct HostProps{") + self.print.add_indent(4) + + self.print("") + self.print("//Property host pointers are in PairsObjects") + + self.print("") + self.print("//Property host flags") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {pname}_host_flag = false;") + + self.print("") + self.print("//Property device flags") + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {pname}_device_flag_h = false;") + + self.print("") + self.print("//Feature property host pointers are in PairsObjects") + + self.print("") + self.print("//Feature property host flags") + for fp in self.sim.feature_properties: + fpname = fp.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"{tkw} {fpname}_host_flag = false;") + + self.print("") + self.print("//Feature properties have no device flags") + + self.print.add_indent(-4) + self.print("};") + self.print("") + + def member_variables(self): + self.print("PairsSimulation *ps;") + if self.target.is_gpu(): + self.print("pairs::internal::HostProps *hp;") + self.print("pairs::internal::DeviceProps *dp_h;") + self.print("pairs::internal::DeviceProps *dp_d;") + + def update(self): + self.print(f"{self.host_attr}void update(){{") + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"dp_h->nlocal = ps->pobj->nlocal;") + self.print(f"dp_h->nghost = ps->pobj->nghost;") + + for p in self.sim.properties: + pname = p.name() + self.print(f"dp_h->{pname}_d = ps->pobj->{pname}_d;") + + self.print(f"cudaMemcpy(dp_d, dp_h, sizeof(pairs::internal::DeviceProps), cudaMemcpyHostToDevice);") + self.print.add_indent(-4) + self.print("}") + self.print("") + + def constructor(self): + if self.target.is_gpu(): + self.print(f"{self.host_attr}PairsAccessor(PairsSimulation *ps_): ps(ps_){{") + self.print.add_indent(4) + + self.print(f"hp = new pairs::internal::HostProps;") + self.print(f"dp_h = new pairs::internal::DeviceProps;") + self.print(f"cudaMalloc(&dp_d, sizeof(pairs::internal::DeviceProps));") + + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"cudaMalloc(&(dp_h->{pname}_device_flag_d), sizeof({tkw}));") + + self.print("this->update();") + self.print.add_indent(-4) + self.print("}") + + else: + self.print("PairsAccessor(PairsSimulation *ps_): ps(ps_){}") + + self.print("") + + def destructor(self): + if self.target.is_gpu(): + self.print(f"{self.host_attr}~PairsAccessor(){{") + self.print.add_indent(4) + + for p in self.sim.properties: + pname = p.name() + tkw = Types.c_keyword(self.sim, Types.Boolean) + self.print(f"cudaFree(dp_h->{pname}_device_flag_d);") + + self.print(f"delete hp;") + self.print(f"delete dp_h;") + self.print(f"cudaFree(dp_d);") + + self.print.add_indent(-4) + self.print("}") + self.print("") + + def ifdef_else(self, ifdef, func1, args1, func2, args2): + self.print.add_indent(4) + self.print(f"#ifdef {ifdef}") + func1(*args1) + self.print("#else") + func2(*args2) + self.print("#endif") + self.print.add_indent(-4) + + def generate_ref_name(self, prop, device): + pname = prop.name() + + if self.target.is_gpu() and device: + if isinstance(prop, Property): + return f"dp_d->{pname}_d" + + elif isinstance(prop, FeatureProperty): + return f"{pname}_d" + else: + return f"ps->pobj->{pname}" + + def getter_body(self, prop, device=False): + self.print.add_indent(4) + tkw = Types.c_accessor_keyword(self.sim, prop.type()) + ptr = self.generate_ref_name(prop, device) + + if isinstance(prop, Property): + idx = "i" + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)" + + if Types.is_scalar(prop.type()): + self.print(f"return {ptr}[{idx}];") + else: + nelems = Types.number_of_elements(self.sim, prop.type()) + return_values = [f"{ptr}[{idx}*{nelems} + {n}]" for n in range(nelems)] + self.print(f"return {tkw}(" + ", ".join(rv for rv in return_values) + ");") + self.print.add_indent(-4) + + def get_property(self, prop): + pname = prop.name() + tkw = Types.c_accessor_keyword(self.sim, prop.type()) + + if isinstance(prop, Property): + splitname = pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + params = "const size_t i" + + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + splitname = fname.split('_') + pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + params = f"const size_t {fname}1, const size_t {fname}2" + + self.print(f"{self.host_device_attr}{tkw} get{funcname}({params}) const{{") + + if self.target.is_gpu(): + self.ifdef_else("__CUDA_ARCH__", self.getter_body, [prop, True], self.getter_body, [prop, False]) + else: + self.getter_body(prop, False) + + self.print("}") + self.print("") + + def setter_body(self, prop, device=False): + self.print.add_indent(4) + ptr = self.generate_ref_name(prop, device) + + if isinstance(prop, Property): + idx = "i" + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + idx = f"({prop.feature().nkinds()}*{fname}1 + {fname}2)" + + if Types.is_scalar(prop.type()): + self.print(f"{ptr}[{idx}] = value;") + else: + nelems = Types.number_of_elements(self.sim, prop.type()) + for n in range(nelems): + self.print(f"{ptr}[{idx}*{nelems} + {n}] = value[{n}];") + + if self.target.is_gpu(): + pname = prop.name() + flag = f"*(dp_d->{pname}_device_flag_d)" if device else f"hp->{pname}_host_flag" + self.print(f"{flag} = true;") + + self.print.add_indent(-4) + + def set_property(self, prop): + pname = prop.name() + tkw = Types.c_accessor_keyword(self.sim, prop.type()) + + if isinstance(prop, Property): + splitname = pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + self.print(f"{self.host_device_attr}void set{funcname}(const size_t i, const {tkw} &value){{") + + elif isinstance(prop, FeatureProperty): + fname = prop.feature().name() + splitname = fname.split('_') + pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + # Feature properties can only be set from host + self.print(f"{self.host_attr}void set{funcname}(const size_t {fname}1, const size_t {fname}2, const {tkw} &value){{") + + if self.target.is_gpu(): + if isinstance(prop, Property): + self.ifdef_else("__CUDA_ARCH__", self.setter_body, [prop, True], self.setter_body, [prop, False]) + + elif isinstance(prop, FeatureProperty): + self.setter_body(prop, False) + else: + self.setter_body(prop, False) + + self.print("}") + self.print("") + + def sync_ctx_enum(self): + self.print("enum SyncContext{") + self.print(" Host = 0,") + self.print(" Device") + self.print("};") + self.print("") + + def sync_property(self, prop): + pname = prop.name() + pid = prop.id() + splitname = pname.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + + self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host, bool overwrite = false){{") + + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"cudaMemcpy(&(hp->{pname}_device_flag_h), dp_h->{pname}_device_flag_d, sizeof(bool), cudaMemcpyDeviceToHost);") + self.print("") + + ##################################################################################################################### + ##################################################################################################################### + + self.print(f"if (hp->{pname}_host_flag && hp->{pname}_device_flag_h){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 1! Both host and device versions of {pname} are in a modified state.\\n\");") + self.print(" exit(-1);") + self.print("}") + self.print(f"else if(sync_ctx==Host && overwrite==false){{") + self.print(f" if (hp->{pname}_host_flag && !ps->pairs_runtime->getPropFlags()->isHostFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 2! Did you forget to sync{funcname}(Host) before calling set{funcname} from host? Use sync{funcname}(Host,true) if you want to overwrite {pname} values in host.\\n\");") + self.print(" exit(-1);") + self.print(" }") + self.print("}") + self.print(f"else if(sync_ctx==Device && overwrite==false){{") + self.print(f" if (hp->{pname}_device_flag_h && !ps->pairs_runtime->getPropFlags()->isDeviceFlagSet({pid})){{") + self.print(f" PAIRS_ERROR(\"OUT OF SYNC 3! Did you forget to sync{funcname}(Device) before calling set{funcname} from device? Use sync{funcname}(Device,true) if you want to overwrite {pname} values in device.\\n\");") + self.print(" exit(-1);") + self.print(" }") + self.print("}") + self.print("") + + ##################################################################################################################### + ##################################################################################################################### + + self.print(f"if (hp->{pname}_host_flag){{") + self.print(f" ps->pairs_runtime->getPropFlags()->setHostFlag({pid});") + self.print(f" ps->pairs_runtime->getPropFlags()->clearDeviceFlag({pid});") + self.print("}") + + self.print(f"else if (hp->{pname}_device_flag_h){{") + self.print(f" ps->pairs_runtime->getPropFlags()->setDeviceFlag({pid});") + self.print(f" ps->pairs_runtime->getPropFlags()->clearHostFlag({pid});") + self.print("}") + self.print("") + + nelems = Types.number_of_elements(self.sim, prop.type()) + tkw = Types.c_keyword(self.sim, prop.type()) + + self.print(f"if (sync_ctx==Device) {{") + self.print(f" ps->pairs_runtime->copyPropertyToDevice({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") + self.print("}") + self.print("") + + self.print(f"if (sync_ctx==Host) {{") + self.print(f" ps->pairs_runtime->copyPropertyToHost({pid}, ReadOnly, (((ps->pobj->nlocal + ps->pobj->nghost) * {nelems}) * sizeof({tkw})));") + self.print("}") + self.print("") + + self.print(f"hp->{pname}_host_flag = false;") + self.print(f"hp->{pname}_device_flag_h = false;") + self.print(f"cudaMemcpy(dp_h->{pname}_device_flag_d, &(hp->{pname}_device_flag_h), sizeof(bool), cudaMemcpyHostToDevice);") + + self.print.add_indent(-4) + self.print("}") + self.print("") + + def sync_feature_property(self, fp): + fp_id = fp.id() + fp_name = fp.name() + f_name = fp.feature().name() + splitname = f_name.split('_') + fp_name.split('_') + funcname = ''.join(word.capitalize() for word in splitname) + + self.print(f"{self.host_attr}void sync{funcname}(SyncContext sync_ctx = Host){{") + + if self.target.is_gpu(): + self.print.add_indent(4) + self.print(f"if (hp->{fp_name}_host_flag && sync_ctx==Device) {{") + self.print(f" ps->pairs_runtime->copyFeaturePropertyToDevice({fp_id});") + self.print("}") + self.print("") + + self.print(f"hp->{fp_name}_host_flag = false;") + self.print.add_indent(-4) + + self.print("}") + self.print("") + + def utility_funcs(self): + nlocal = "ps->pobj->nlocal" + nlocal_d = "dp_d->nlocal" + nghost = "ps->pobj->nghost" + nghost_d = "dp_d->nghost" + + if self.target.is_gpu(): + self.print(f"{self.host_device_attr}int size() const {{") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nlocal_d} + {nghost_d};") + self.print(f" #else") + self.print(f" return {nlocal} + {nghost};") + self.print(f" #endif") + self.print("}") + self.print("") + else: + self.print(f"int size() const {{return {nlocal} + {nghost};}}") + + if self.target.is_gpu(): + self.print(f"{self.host_device_attr}int nlocal() const {{") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nlocal_d};") + self.print(f" #else") + self.print(f" return {nlocal};") + self.print(f" #endif") + self.print("}") + self.print("") + else: + self.print(f"int nlocal() const {{return {nlocal};}}") + + if self.target.is_gpu(): + self.print(f"{self.host_device_attr}int nghost() const {{") + self.print(f" #ifdef __CUDA_ARCH__") + self.print(f" return {nghost_d};") + self.print(f" #else") + self.print(f" return {nghost};") + self.print(f" #endif") + self.print("}") + self.print("") + else: + self.print(f"int nghost() const {{return {nghost};}}") + + + self.print(f"{self.host_device_attr}int getInvalidIdx(){{return -1;}}") + self.print("") + + self.print(f"{self.host_device_attr}pairs::id_t getInvalidUid(){{return 0;}}") + self.print("") + + self.print(f"{self.host_device_attr}int uidToIdx(pairs::id_t uid){{") + self.print(" int idx = getInvalidIdx();") + self.print(" for(int i=0; i<size(); ++i){") + self.print(" if (getUid(i) == uid){") + self.print(" idx = i;") + self.print(" break;") + self.print(" }") + self.print(" }") + self.print(" return idx;") + self.print("}") + self.print("") + + self.print(f"{self.host_device_attr}int uidToIdxLocal(pairs::id_t uid){{") + self.print(" int idx = getInvalidIdx();") + self.print(" for(int i=0; i<nlocal(); ++i){") + self.print(" if (getUid(i) == uid){") + self.print(" idx = i;") + self.print(" break;") + self.print(" }") + self.print(" }") + self.print(" return idx;") + self.print("}") + self.print("") + + self.print(f"{self.host_device_attr}int uidToIdxGhost(pairs::id_t uid){{") + self.print(" int idx = getInvalidIdx();") + self.print(" for(int i=nlocal(); i<size(); ++i){") + self.print(" if (getUid(i) == uid){") + self.print(" idx = i;") + self.print(" break;") + self.print(" }") + self.print(" }") + self.print(" return idx;") + self.print("}") + self.print("") diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index a053d942e72fabf44dba726c8bd6c4e93a919f5e..76e22283330f531eed2f449a30eee798bf62c00e 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -9,7 +9,7 @@ from pairs.ir.cast import Cast from pairs.ir.contexts import Contexts from pairs.ir.declaration import Decl from pairs.ir.scalars import ScalarOp -from pairs.ir.device import CopyArray, CopyContactProperty, CopyProperty, CopyVar, DeviceStaticRef, HostRef +from pairs.ir.device import CopyArray, CopyContactProperty, CopyProperty, CopyFeatureProperty, CopyVar, DeviceStaticRef, HostRef from pairs.ir.features import FeatureProperty, FeaturePropertyAccess, RegisterFeatureProperty from pairs.ir.functions import Call from pairs.ir.kernel import KernelLaunch @@ -26,12 +26,15 @@ from pairs.ir.properties import Property, PropertyAccess, RegisterProperty, Real from pairs.ir.select import Select from pairs.ir.sizeof import Sizeof from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print, PrintCode from pairs.ir.variables import Var, DeclareVariable, Deref +from pairs.ir.parameters import Parameter from pairs.ir.vectors import Vector, VectorAccess, VectorOp, ZeroVector +from pairs.ir.ret import Return from pairs.sim.domain_partitioners import DomainPartitioners from pairs.sim.timestep import Timestep from pairs.code_gen.printer import Printer +from pairs.code_gen.accessor import PairsAcessor class CGen: @@ -42,6 +45,7 @@ class CGen: self.target = None self.print = None self.kernel_context = False + self.generate_full_object_names = False self.ref = ref self.debug = debug @@ -53,15 +57,104 @@ class CGen: def real_type(self): return Types.c_keyword(self.sim, Types.Real) + + # def generate_cmake_config_file(self): + # self.print = Printer("pairs_cmake_params.txt") + # self.print.start() + # self.print(f"PAIRS_TARGET={self.ref}") + # self.print(f"GENERATE_WHOLE_PROGRAM={'ON' if self.sim._generate_whole_program else 'OFF'}") + # self.print(f"USE_WALBERLA={'ON' if self.sim._partitioner == DomainPartitioners.BlockForest else 'OFF'}") + # # self.print(f"COMPILE_CUDA={'ON' if self.target.is_gpu() else 'OFF'}") + # self.print.end() + + def generate_object_reference(self, obj, device=False, index=None): + if device and (not self.target.is_gpu() or not obj.device_flag): + # Ideally this should never be called + return "nullptr" + + name = obj.name() if not device else f"{obj.name()}_d" + t = obj.type() + if not Types.is_scalar(t) and index is not None: + name += f"_{index}" + + if isinstance(obj, Var): + if self.generate_full_object_names: + if not obj.temporary(): + if obj.device_flag and self.target.is_gpu() and device: + return f"pobj->rv_{obj.name()}" + else: + return f"pobj->{name}" + return name + + if isinstance(obj, FeatureProperty) and device and obj.device_flag: + return name + + if isinstance(obj, Array) and device and obj.device_flag: + if obj.is_static(): + return name + + + if self.generate_full_object_names: + return f"pobj->{name}" + else: + return name + - def generate_program(self, ast_node): - ext = ".cu" if self.target.is_gpu() else ".cpp" - self.print = Printer(self.ref + ext) + def generate_object_address(self, obj, device=False, index=None): + if device and (not self.target.is_gpu() or not obj.device_flag): + return "nullptr" + + ref = self.generate_object_reference(obj, device, index) + return f"&({ref})" + + def generate_interfaces(self): + #self.print = Printer(f"interfaces/{self.ref}.hpp") + self.print = Printer("internal_interfaces/last_generated.hpp") self.print.start() + self.print("#pragma once") + self.generate_interface_namespace('pairs_host_interface') if self.target.is_gpu(): - self.print("#define PAIRS_TARGET_CUDA") + self.generate_interface_namespace('pairs_cuda_interface', "__inline__ __device__") + + self.print.end() + + def generate_interface_namespace(self, namespace, prefix=None): + self.print("") + self.print(f"namespace {namespace} {{") + self.print("") + for prop in self.sim.properties.all(): + prop_name = prop.name() + t = prop.type() + tkw = Types.c_keyword(self.sim, t) + func_decl = "" if prefix is None else f"{prefix} " + if Types.is_scalar(t): + func_decl += f"{tkw} get_{prop_name}({tkw} *{prop_name}, int i) {{ return {prop_name}[i]; }}" + + else: + nelems = Types.number_of_elements(self.sim, t) + func_decl += f"{tkw} get_{prop_name}({tkw} *{prop_name}, int i, int j, int capacity) {{ return {prop_name}[" + + if prop.layout() == Layouts.AoS: + func_decl += f"i * {nelems} + j" + + else: + func_decl += f"j * capacity + i" + + func_decl += "]; }" + + self.print(func_decl) + + self.print("") + self.print("}") + + def generate_preamble(self): + # self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"") + + if self.target.is_gpu(): + self.print("#include <math_constants.h>") + if self.target.is_openmp(): self.print("#define PAIRS_TARGET_OPENMP") self.print("#include <omp.h>") @@ -72,135 +165,346 @@ class CGen: self.print("#include <stdio.h>") self.print("#include <stdlib.h>") self.print("//---") - self.print("#include \"runtime/likwid-marker.h\"") - self.print("#include \"runtime/copper_fcc_lattice.hpp\"") - self.print("#include \"runtime/dem_sc_grid.hpp\"") - self.print("#include \"runtime/pairs.hpp\"") - self.print("#include \"runtime/read_from_file.hpp\"") - self.print("#include \"runtime/stats.hpp\"") - self.print("#include \"runtime/timing.hpp\"") - self.print("#include \"runtime/thermo.hpp\"") - self.print("#include \"runtime/vtk.hpp\"") - - #if self.target.is_gpu(): - # self.print("#include \"runtime/devices/cuda.hpp\"") - #else: - # self.print("#include \"runtime/devices/dummy.hpp\"") - + self.print("#include \"likwid-marker.h\"") + self.print("#include \"copper_fcc_lattice.hpp\"") + self.print("#include \"create_body.hpp\"") + self.print("#include \"dem_sc_grid.hpp\"") + self.print("#include \"pairs.hpp\"") + self.print("#include \"read_from_file.hpp\"") + self.print("#include \"stats.hpp\"") + self.print("#include \"timing.hpp\"") + self.print("#include \"thermo.hpp\"") + self.print("#include \"vtk.hpp\"") self.print("") self.print("using namespace pairs;") self.print("") + def generate_module_header(self, module, definition=True): + module_params = [] + + if not module.interface: + module_params += ["PairsRuntime *pairs_runtime", "struct PairsObjects *pobj"] + + if module.name=="initialize" and self.sim.create_domain_at_initialization: + module_params += ["int argc", "char **argv"] + + if module.name=="set_domain": + module_params += ["int argc", "char **argv"] + + module_params += [f"{Types.c_keyword(self.sim, param.type())} {param.name()}" for param in module.parameters()] + + print_params = ", ".join(module_params) + ending = "{" if definition else ";" + tkw = Types.c_keyword(self.sim, module.return_type) + self.print(f"{tkw} {module.name}({print_params}){ending}") + + def generate_module_decls(self): + self.print("") + self.print("namespace pairs::internal {") + self.print.add_indent(4) + + # All modules except the interface ones are declared in the pairs::internal scope + for module in self.sim.modules() + self.sim.udf_modules(): + assert not module.interface + self.generate_module_header(module, definition=False) + + self.print.add_indent(-4) + self.print("}") + self.print("") + + def generate_pairs_object_structure(self): + self.print("") + externkw = "" if self.sim._generate_whole_program else "extern " if self.target.is_gpu(): for array in self.sim.arrays.statics(): if array.device_flag: t = array.type() tkw = Types.c_keyword(self.sim, t) size = self.generate_expression(ScalarOp.inline(array.alloc_size())) - self.print(f"__constant__ {tkw} d_{array.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} {array.name()}_d[{size}];") for feature_prop in self.sim.feature_properties: if feature_prop.device_flag: t = feature_prop.type() tkw = Types.c_keyword(self.sim, t) size = feature_prop.array_size() - self.print(f"__constant__ {tkw} d_{feature_prop.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} {feature_prop.name()}_d[{size}];") self.print("") + self.print("struct PairsObjects {") + self.print.add_indent(4) + + self.print("// Arrays") + for a in self.sim.arrays.all(): + ptr = a.name() + tkw = Types.c_keyword(self.sim, a.type()) + + if a.is_static(): + size = self.generate_expression(ScalarOp.inline(a.alloc_size())) + self.print(f"{tkw} {ptr}[{size}];") + + else: + self.print(f"{tkw} *{ptr};") + + if self.target.is_gpu() and a.device_flag: + if a.is_static(): + continue + else: + self.print(f"{tkw} *{ptr}_d;") + + self.print("// Properties") + for p in self.sim.properties: + ptr = p.name() + tkw = Types.c_keyword(self.sim, p.type()) + self.print(f"{tkw} *{ptr};") + + if self.target.is_gpu() and p.device_flag: + self.print(f"{tkw} *{ptr}_d;") + + self.print("// Contact properties") + for cp in self.sim.contact_properties: + ptr = cp.name() + tkw = Types.c_keyword(self.sim, cp.type()) + self.print(f"{tkw} *{ptr};") + + if self.target.is_gpu() and cp.device_flag: + self.print(f"{tkw} *{ptr}_d;") + + self.print("// Feature properties") + for fp in self.sim.feature_properties: + ptr = fp.name() + array_size = fp.array_size() + tkw = Types.c_keyword(self.sim, fp.type()) + self.print(f"{tkw} {ptr}[{array_size}];") + + self.print("// Variables") + for v in self.sim.vars.all(): + vname = v.name() + tkw = Types.c_keyword(self.sim, v.type()) + self.print(f"{tkw} {vname};") + + if self.target.is_gpu() and v.device_flag: + self.print(f"RuntimeVar<{tkw}> rv_{vname};") + + self.print.add_indent(-4) + self.print("};") + self.print("") + + def generate_program(self, ast_node): + self.generate_interfaces() + ext = ".cu" if self.target.is_gpu() else ".cpp" + self.print = Printer(self.ref + ext) + self.print.start() + self.generate_preamble() + self.generate_pairs_object_structure() + self.generate_module_decls() + + self.print("namespace pairs::internal {") + self.print.add_indent(4) for kernel in self.sim.kernels(): self.generate_kernel(kernel) for module in self.sim.modules(): + if module.name!='main': + self.generate_module(module) + + self.print.add_indent(-4) + self.print("}") + + for module in self.sim.modules(): + if module.name=='main': + self.generate_main(module) + + self.print.end() + + def generate_library(self): + self.generate_interfaces() + # Generate CUDA/CPP file with modules + ext = ".cu" if self.target.is_gpu() else ".cpp" + self.print = Printer(self.ref + ext) + self.print.start() + self.generate_preamble() + self.print(f"#include \"{self.ref}.hpp\"") + self.print("") + + if self.target.is_gpu(): + for array in self.sim.arrays.statics(): + if array.device_flag: + t = array.type() + tkw = Types.c_keyword(self.sim, t) + size = self.generate_expression(ScalarOp.inline(array.alloc_size())) + self.print(f"__constant__ {tkw} {array.name()}_d[{size}];") + + for feature_prop in self.sim.feature_properties: + if feature_prop.device_flag: + t = feature_prop.type() + tkw = Types.c_keyword(self.sim, t) + size = feature_prop.array_size() + self.print(f"__constant__ {tkw} {feature_prop.name()}_d[{size}];") + + self.print("") + + self.print("namespace pairs::internal {") + self.print.add_indent(4) + + for kernel in self.sim.kernels(): + self.generate_kernel(kernel) + + # All modules except the interface ones are defined in the pairs::internal scope + for module in self.sim.modules() + self.sim.udf_modules(): + assert not module.interface self.generate_module(module) + self.print.add_indent(-4) + self.print("}") + self.print.end() - def generate_module(self, module): - if module.name == 'main': - ndims = module.sim.ndims() - nprops = module.sim.properties.nprops() - ncontactprops = module.sim.contact_properties.nprops() - narrays = module.sim.arrays.narrays() - part = DomainPartitioners.c_keyword(module.sim.partitioner()) + # Generate library header + self.print = Printer(self.ref + ".hpp") + self.print.start() + self.print("#pragma once") + + self.generate_preamble() + self.generate_pairs_object_structure() + self.generate_module_decls() + + self.generate_full_object_names = True + self.print("class PairsSimulation {") + self.print("private:") + self.print(" PairsRuntime *pairs_runtime;") + self.print(" struct PairsObjects *pobj;") + self.print(" friend class PairsAccessor;") + self.print("") + self.print("public:") + self.print.add_indent(4) - self.print("int main(int argc, char **argv) {") - self.print(f" PairsSimulation *pairs = new PairsSimulation({nprops}, {ncontactprops}, {narrays}, {part});") + self.print("PairsRuntime* getPairsRuntime() {") + self.print(" return pairs_runtime;") + self.print("}") - if module.sim._enable_profiler: - self.print(" LIKWID_MARKER_INIT;") + # Only interface modules are generated in the PairsSimulation class + for module in self.sim.interface_modules(): + self.generate_module(module) - self.generate_statement(module.block) + self.print.add_indent(-4) + self.print("};") - if module.sim._enable_profiler: - self.print(" LIKWID_MARKER_CLOSE;") + PairsAcessor(self).generate() + + self.print.end() + self.generate_full_object_names = False - self.print(" pairs::print_timers(pairs);") - self.print(" pairs::print_stats(pairs, nlocal, nghost);") - self.print(" delete pairs;") - self.print(" return 0;") - self.print("}") + def generate_module_declerations(self, module): + device_cond = module.run_on_device and self.target.is_gpu() - else: - module_params = "PairsSimulation *pairs" - for var in module.read_only_variables(): - type_kw = Types.c_keyword(self.sim, var.type()) - decl = f"{type_kw} {var.name()}" - module_params += f", {decl}" - - for var in module.write_variables(): - type_kw = Types.c_keyword(self.sim, var.type()) - decl = f"{type_kw} *{var.name()}" - module_params += f", {decl}" - - for array in module.arrays(): - type_kw = Types.c_keyword(self.sim, array.type()) - decl = f"{type_kw} *{array.name()}" - module_params += f", {decl}" - - if array in module.host_references(): - decl = f"{type_kw} *h_{array.name()}" - module_params += f", {decl}" - - for prop in module.properties(): - type_kw = Types.c_keyword(self.sim, prop.type()) - decl = f"{type_kw} *{prop.name()}" - module_params += f", {decl}" - - if prop in module.host_references(): - decl = f"{type_kw} *h_{prop.name()}" - module_params += f", {decl}" - - for contact_prop in module.contact_properties(): - type_kw = Types.c_keyword(self.sim, contact_prop.type()) - decl = f"{type_kw} *{contact_prop.name()}" - module_params += f", {decl}" - - if contact_prop in module.host_references(): - decl = f"{type_kw} *h_{contact_prop.name()}" - module_params += f", {decl}" - - for feature_prop in module.feature_properties(): - type_kw = Types.c_keyword(self.sim, feature_prop.type()) - decl = f"{type_kw} *{feature_prop.name()}" - module_params += f", {decl}" - - if feature_prop in module.host_references(): - decl = f"{type_kw} *h_{feature_prop.name()}" - module_params += f", {decl}" - - self.print(f"void {module.name}({module_params}) {{") - - if self.debug: - self.print.add_indent(4) - self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");") - self.print.add_indent(-4) - - self.generate_statement(module.block) - self.print("}") + for var in module.read_only_variables(): + type_kw = Types.c_keyword(self.sim, var.type()) + self.print(f"{type_kw} {var.name()} = pobj->{var.name()};") + + for var in module.write_variables(): + type_kw = Types.c_keyword(self.sim, var.type()) + + if device_cond and var.device_flag: + self.print(f"{type_kw} *{var.name()} = pobj->rv_{var.name()}.getDevicePointer();") + elif var.force_read: + self.print(f"{type_kw} {var.name()} = pobj->{var.name()};") + else: + self.print(f"{type_kw} *{var.name()} = &(pobj->{var.name()});") + + for array in module.arrays(): + type_kw = Types.c_keyword(self.sim, array.type()) + name = array.name() if not device_cond else f"{array.name()}_d" + if not array.is_static() or (array.is_static() and not device_cond): + self.print(f"{type_kw} *{array.name()} = pobj->{name};") + + if array in module.host_references(): + self.print(f"{type_kw} *{array.name()}_h = pobj->{array.name()};") + + + for prop in module.properties(): + type_kw = Types.c_keyword(self.sim, prop.type()) + name = prop.name() if not device_cond else f"{prop.name()}_d" + self.print(f"{type_kw} *{prop.name()} = pobj->{name};") + + if prop in module.host_references(): + self.print(f"{type_kw} *{prop.name()}_h = pobj->{prop.name()};") + + for contact_prop in module.contact_properties(): + type_kw = Types.c_keyword(self.sim, contact_prop.type()) + name = contact_prop.name() if not device_cond else f"{contact_prop.name()}_d" + self.print(f"{type_kw} *{contact_prop.name()} = pobj->{name};") + + if contact_prop in module.host_references(): + self.print(f"{type_kw} *{contact_prop.name()}_h = pobj->{contact_prop.name()};") + + for feature_prop in module.feature_properties(): + type_kw = Types.c_keyword(self.sim, feature_prop.type()) + name = feature_prop.name() if not device_cond else f"{feature_prop.name()}_d" + + if feature_prop.device_flag and device_cond: + # self.print(f"{type_kw} *{feature_prop.name()} = {self.generate_object_reference(feature_prop, device=device_cond)};") + continue + else: + self.print(f"{type_kw} *{feature_prop.name()} = pobj->{name};") + + if feature_prop in module.host_references(): + self.print(f"{type_kw} *{feature_prop.name()}_h = pobj->{feature_prop.name()};") + + def generate_main(self, module): + assert module.name=='main' + + ndims = module.sim.ndims() + nprops = module.sim.properties.nprops() + ncontactprops = module.sim.contact_properties.nprops() + narrays = module.sim.arrays.narrays() + part = DomainPartitioners.c_keyword(module.sim.partitioner()) + + self.generate_full_object_names = True + self.print("int main(int argc, char **argv) {") + self.print(f" PairsRuntime *pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});") + self.print(f" struct PairsObjects *pobj = new PairsObjects();") + + if module.sim._enable_profiler: + self.print(" LIKWID_MARKER_INIT;") + + self.generate_statement(module.block) + + if module.sim._enable_profiler: + self.print(" LIKWID_MARKER_CLOSE;") + + self.print(" pairs::print_timers(pairs_runtime);") + self.print(" pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);") + self.print(" delete pobj;") + self.print(" delete pairs_runtime;") + self.print(" return 0;") + self.print("}") + self.generate_full_object_names = False + + def generate_module(self, module): + self.generate_module_header(module, definition=True) + self.print.add_indent(4) + + if self.debug: + self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");") + + if not module.interface: + self.generate_module_declerations(module) + + self.print.add_indent(-4) + self.generate_statement(module.block) + self.print("}") + self.print("") def generate_kernel(self, kernel): kernel_params = "int range_start" + has_resizes = False + for param in kernel.parameters(): + type_kw = Types.c_keyword(self.sim, param.type()) + decl = f"{type_kw} {param.name()}" + kernel_params += f", {decl}" + for var in kernel.read_only_variables(): type_kw = Types.c_keyword(self.sim, var.type()) decl = f"{type_kw} {var.name()}" @@ -211,10 +515,19 @@ class CGen: decl = f"{type_kw} *{var.name()}" kernel_params += f", {decl}" + for it in kernel.iters(): + type_kw = Types.c_keyword(self.sim, it.type()) + decl = f"{type_kw} {it.name()}" + kernel_params += f", {decl}" + for array in kernel.arrays(): + if array.is_static(): + continue type_kw = Types.c_keyword(self.sim, array.type()) decl = f"{type_kw} *{array.name()}" kernel_params += f", {decl}" + if array.name() == "resizes": + has_resizes = True for prop in kernel.properties(): type_kw = Types.c_keyword(self.sim, prop.type()) @@ -227,6 +540,8 @@ class CGen: kernel_params += f", {decl}" for feature_prop in kernel.feature_properties(): + if feature_prop.device_flag: + continue type_kw = Types.c_keyword(self.sim, feature_prop.type()) decl = f"{type_kw} *{feature_prop.name()}" kernel_params += f", {decl}" @@ -245,7 +560,9 @@ class CGen: self.print(f" const int {kernel.iterator.name()} = blockIdx.x * blockDim.x + threadIdx.x + range_start;") self.print.add_indent(4) self.kernel_context = True + self.generate_statement(kernel.block) + self.kernel_context = False self.print.add_indent(-4) self.print("}") @@ -255,6 +572,7 @@ class CGen: t = ast_node.array.type() tkw = Types.c_keyword(self.sim, t) size = self.generate_expression(ScalarOp.inline(ast_node.array.alloc_size())) + if ast_node.array.init_value is not None: v_str = str(ast_node.array.init_value) if t == Types.Int64: @@ -262,10 +580,8 @@ class CGen: if t == Types.UInt64: v_str += "ULL" - init_string = v_str + (f", {v_str}" * (size - 1)) - self.print(f"{tkw} {ast_node.array.name()}[{size}] = {{{init_string}}};") - else: - self.print(f"{tkw} {ast_node.array.name()}[{size}];") + for i in range(size): + self.print(f"{ast_node.array.name()}[{i}] = {v_str};") if isinstance(ast_node, Assign): if not Types.is_scalar(ast_node._dest.type()): @@ -287,7 +603,10 @@ class CGen: if ast_node.check_for_resize(): resize = self.generate_expression(ast_node.resize) capacity = self.generate_expression(ast_node.capacity) + # self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") + # self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") + else: self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") @@ -484,10 +803,12 @@ class CGen: size = self.generate_expression(ast_node.size()) if size is not None: - self.print(f"pairs->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}") + self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}") else: - self.print(f"pairs->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}") + # self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") + self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}") + # self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") if isinstance(ast_node, CopyContactProperty): prop_id = ast_node.contact_prop().id() @@ -495,7 +816,7 @@ class CGen: action = Actions.c_keyword(ast_node.action()) ctx_suffix = "Device" if ast_node.context() == Contexts.Device else "Host" size = self.generate_expression(ast_node.contact_prop().copy_size()) - self.print(f"pairs->copyContactPropertyTo{ctx_suffix}({prop_id}, {action}, {size}); // {prop_name}") + self.print(f"pairs_runtime->copyContactPropertyTo{ctx_suffix}({prop_id}, {action}, {size}); // {prop_name}") if isinstance(ast_node, CopyProperty): prop_id = ast_node.prop().id() @@ -503,12 +824,20 @@ class CGen: action = Actions.c_keyword(ast_node.action()) ctx_suffix = "Device" if ast_node.context() == Contexts.Device else "Host" size = self.generate_expression(ast_node.prop().copy_size()) - self.print(f"pairs->copyPropertyTo{ctx_suffix}({prop_id}, {action}, {size}); // {prop_name}") + self.print(f"pairs_runtime->copyPropertyTo{ctx_suffix}({prop_id}, {action}, {size}); // {prop_name}") + + if isinstance(ast_node, CopyFeatureProperty): + prop_id = ast_node.prop().id() + prop_name = ast_node.prop().name() + if ast_node.context() == Contexts.Device: + assert ast_node.action()==Actions.ReadOnly, "Feature properties can only be read from device." + self.print(f"pairs_runtime->copyFeaturePropertyToDevice({prop_id}); // {prop_name}") if isinstance(ast_node, CopyVar): var_name = ast_node.variable().name() ctx_suffix = "Device" if ast_node.context() == Contexts.Device else "Host" - self.print(f"rv_{var_name}.copyTo{ctx_suffix}();") + ref = self.generate_object_reference(ast_node.variable(), device=True) + self.print(f"{ref}.copyTo{ctx_suffix}();") if isinstance(ast_node, For): iterator = self.generate_expression(ast_node.iterator) @@ -531,24 +860,32 @@ class CGen: if ast_node.decl: self.print(f"{tkw} *{array_name} = ({tkw} *) malloc({size});") if self.target.is_gpu() and ast_node.array.device_flag: - self.print(f"{tkw} *d_{array_name} = ({tkw} *) pairs::device_alloc({size});") + self.print(f"{tkw} *{array_name}_d = ({tkw} *) pairs::device_alloc({size});") else: self.print(f"{array_name} = ({tkw} *) malloc({size});") if self.target.is_gpu() and ast_node.array.device_flag: - self.print(f"d_{array_name} = ({tkw} *) pairs::device_alloc({size});") + self.print(f"{array_name}_d = ({tkw} *) pairs::device_alloc({size});") if isinstance(ast_node, KernelLaunch): range_start = self.generate_expression(ScalarOp.inline(ast_node.min)) kernel = ast_node.kernel kernel_params = f"{range_start}" + for param in kernel.parameters(): + kernel_params += f", {param.name()}" + for var in kernel.read_only_variables(): kernel_params += f", {var.name()}" for var in kernel.write_variables(): kernel_params += f", {var.name()}" + for it in kernel.iters(): + kernel_params += f", {it.name()}" + for array in kernel.arrays(): + if array.is_static(): + continue kernel_params += f", {array.name()}" for prop in kernel.properties(): @@ -558,6 +895,8 @@ class CGen: kernel_params += f", {contact_prop.name()}" for feature_prop in kernel.feature_properties(): + if feature_prop.device_flag: + continue kernel_params += f", {feature_prop.name()}" for array_access in kernel.array_accesses(): @@ -571,177 +910,152 @@ class CGen: self.print(f"if({nblocks} > 0 && {threads_per_block} > 0) {{") self.print.add_indent(4) self.print(f"{kernel.name}<<<{nblocks}, {threads_per_block}>>>({kernel_params});") - self.print("pairs->sync();") + self.print("pairs_runtime->sync();") self.print.add_indent(-4) self.print("}") if isinstance(ast_node, ModuleCall): - module = ast_node.module - module_params = "pairs" - device_cond = module.run_on_device and self.target.is_gpu() - - for var in module.read_only_variables(): - decl = var.name() - module_params += f", {decl}" - - for var in module.write_variables(): - decl = f"rv_{var.name()}.getDevicePointer()" if device_cond and var.device_flag else f"&{var.name()}" - module_params += f", {decl}" - - for array in module.arrays(): - decl = f"d_{array.name()}" if device_cond else array.name() - module_params += decl if len(module_params) <= 0 else f", {decl}" - if array in module.host_references(): - decl = array.name() - module_params += f", {decl}" - - for prop in module.properties(): - decl = f"d_{prop.name()}" if device_cond else prop.name() - module_params += f", {decl}" - if prop in module.host_references(): - decl = prop.name() - module_params += f", {decl}" - - for contact_prop in module.contact_properties(): - decl = f"d_{contact_prop.name()}" if device_cond else contact_prop.name() - module_params += f", {decl}" - if contact_prop in module.host_references(): - decl = contact_prop.name() - module_params += f", {decl}" - - for feature_prop in module.feature_properties(): - decl = f"d_{feature_prop.name()}" if device_cond else feature_prop.name() - module_params += f", {decl}" - if feature_prop in module.host_references(): - decl = feature_prop.name() - module_params += f", {decl}" - - self.print(f"{module.name}({module_params});") + module_params = ["pairs_runtime", "pobj"] + + if ast_node.module.name=="init_domain": + module_params += ["argc", "argv"] + + module_params += [f"{param.name()}" for param in ast_node.module.parameters()] + + print_params = ", ".join(module_params) + self.print(f"pairs::internal::{ast_node.module.name}({print_params});") if isinstance(ast_node, Print): - self.print(f"PAIRS_DEBUG(\"{ast_node.string}\\n\");") + args = ast_node.args + exprs = [self.generate_expression(arg) for arg in args] + toPrint = "PAIRS_DEBUG(\"" + for arg in args: + if Types.is_real(arg.type()): + format = "%f " + elif Types.is_integer(arg.type()): + format = "%d " + else: + format = "%s " + toPrint += format + + toPrint = toPrint + "\\n\", " + ", ".join(map(str, exprs)) + ");" + self.print(toPrint) + + if isinstance(ast_node, PrintCode): + toPrint = self.generate_expression(ast_node.arg) + self.print(toPrint[1:-1]) if isinstance(ast_node, Realloc): tkw = Types.c_keyword(self.sim, ast_node.array.type()) size = self.generate_expression(ast_node.size) array_name = ast_node.array.name() - self.print(f"{array_name} = ({tkw} *) realloc({array_name}, {size});") + ptr = self.generate_object_reference(ast_node) + self.print(f"{ptr} = ({tkw} *) realloc({ptr}, {size});") + if self.target.is_gpu() and ast_node.array.device_flag: - self.print(f"d_{array_name} = ({tkw} *) pairs::device_realloc(d_{array_name}, {size});") + d_ptr = self.generate_object_reference(ast_node, device=True) + self.print(f"{d_ptr} = ({tkw} *) pairs::device_realloc({d_ptr}, {size});") if isinstance(ast_node, RegisterArray): a = ast_node.array() - ptr = a.name() - d_ptr = f"d_{ptr}" if self.target.is_gpu() and a.device_flag else "nullptr" tkw = Types.c_keyword(self.sim, a.type()) size = self.generate_expression(ast_node.size()) if a.is_static(): - self.print(f"pairs->addStaticArray({a.id()}, \"{a.name()}\", {ptr}, {d_ptr}, {size});") + ptr_ref = self.generate_object_reference(a) + d_ptr_ref = self.generate_object_reference(a, device=True) + self.print(f"pairs_runtime->addStaticArray({a.id()}, \"{a.name()}\", {ptr_ref}, {d_ptr_ref}, {size});") else: - if self.target.is_gpu() and a.device_flag: - self.print(f"{tkw} *{ptr}, *{d_ptr};") - d_ptr = f"&{d_ptr}" - else: - self.print(f"{tkw} *{ptr};") - - self.print(f"pairs->addArray({a.id()}, \"{a.name()}\", &{ptr}, {d_ptr}, {size});") + ptr_addr = self.generate_object_address(a) + d_ptr_addr = self.generate_object_address(a, device=True) + self.print(f"pairs_runtime->addArray({a.id()}, \"{a.name()}\", {ptr_addr}, {d_ptr_addr}, {size});") if isinstance(ast_node, RegisterProperty): p = ast_node.property() - ptr = p.name() - d_ptr = f"d_{ptr}" if self.target.is_gpu() and p.device_flag else "nullptr" + ptr_addr = self.generate_object_address(p) + d_ptr_addr = self.generate_object_address(p, device=True) tkw = Types.c_keyword(self.sim, p.type()) ptype = Types.c_property_keyword(p.type()) assert ptype != "Prop_Invalid", "Invalid property type!" playout = Layouts.c_keyword(p.layout()) + vol = 1 if p.is_volatile() else 0 sizes = ", ".join([str(self.generate_expression(ScalarOp.inline(size))) for size in ast_node.sizes()]) - - if self.target.is_gpu() and p.device_flag: - self.print(f"{tkw} *{ptr}, *{d_ptr};") - d_ptr = f"&{d_ptr}" - else: - self.print(f"{tkw} *{ptr};") - - self.print(f"pairs->addProperty({p.id()}, \"{p.name()}\", &{ptr}, {d_ptr}, {ptype}, {playout}, {sizes});") + self.print(f"pairs_runtime->addProperty({p.id()}, \"{p.name()}\", {ptr_addr}, {d_ptr_addr}, {ptype}, {playout}, {vol}, {sizes});") if isinstance(ast_node, RegisterContactProperty): p = ast_node.property() - ptr = p.name() - d_ptr = f"d_{ptr}" if self.target.is_gpu() and p.device_flag else "nullptr" + ptr_addr = self.generate_object_address(p) + d_ptr_addr = self.generate_object_address(p, device=True) tkw = Types.c_keyword(self.sim, p.type()) ptype = Types.c_property_keyword(p.type()) assert ptype != "Prop_Invalid", "Invalid property type!" playout = Layouts.c_keyword(p.layout()) sizes = ", ".join([str(self.generate_expression(ScalarOp.inline(size))) for size in ast_node.sizes()]) - - if self.target.is_gpu() and p.device_flag: - self.print(f"{tkw} *{ptr}, *{d_ptr};") - d_ptr = f"&{d_ptr}" - else: - self.print(f"{tkw} *{ptr};") - - self.print(f"pairs->addContactProperty({p.id()}, \"{p.name()}\", &{ptr}, {d_ptr}, {ptype}, {playout}, {sizes});") + self.print(f"pairs_runtime->addContactProperty({p.id()}, \"{p.name()}\", {ptr_addr}, {d_ptr_addr}, {ptype}, {playout}, {sizes});") if isinstance(ast_node, RegisterFeatureProperty): fp = ast_node.feature_property() - ptr = fp.name() - d_ptr = f"&d_{ptr}" if self.target.is_gpu() and fp.device_flag else "nullptr" + ptr = self.generate_object_reference(fp) + ptr_addr = self.generate_object_address(fp) + d_ptr_addr = self.generate_object_address(fp, device=True) array_size = fp.array_size() nkinds = fp.feature().nkinds() tkw = Types.c_keyword(self.sim, fp.type()) fptype = Types.c_property_keyword(fp.type()) assert fptype != "Prop_Invalid", "Invalid feature property type!" - self.print(f"{tkw} {ptr}[{array_size}];") - self.print(f"pairs->addFeatureProperty({fp.id()}, \"{fp.name()}\", &{ptr}, {d_ptr}, {fptype}, {nkinds}, {array_size} * sizeof({tkw}));") + self.print(f"pairs_runtime->addFeatureProperty({fp.id()}, \"{fp.name()}\", {ptr_addr}, {d_ptr_addr}, {fptype}, {nkinds}, {array_size} * sizeof({tkw}));") for i in range(array_size): self.print(f"{ptr}[{i}] = {fp.data()[i]};") if self.target.is_gpu() and fp.device_flag: - self.print(f"pairs->copyFeaturePropertyToDevice({fp.id()}); // {fp.name()}") + self.print(f"pairs_runtime->copyFeaturePropertyToDevice({fp.id()}); // {fp.name()}") if isinstance(ast_node, Timestep): self.generate_statement(ast_node.block) if isinstance(ast_node, ReallocProperty): p = ast_node.property() - ptr = p.name() - d_ptr_addr = f"&d_{ptr}" if self.target.is_gpu() and p.device_flag else "nullptr" + ptr_addr = self.generate_object_address(p) + d_ptr_addr = self.generate_object_address(p, device=True) sizes = ", ".join([str(self.generate_expression(ScalarOp.inline(size))) for size in ast_node.sizes()]) - self.print(f"pairs->reallocProperty({p.id()}, &{ptr}, {d_ptr_addr}, {sizes});") - #self.print(f"pairs->reallocProperty({p.id()}, (void **) &{ptr}, (void **) &d_{ptr}, {sizes});") + self.print(f"pairs_runtime->reallocProperty({p.id()}, {ptr_addr}, {d_ptr_addr}, {sizes});") if isinstance(ast_node, ReallocArray): a = ast_node.array() size = self.generate_expression(ast_node.size()) - ptr = a.name() - d_ptr_addr = f"&d_{ptr}" if self.target.is_gpu() and a.device_flag else "nullptr" - self.print(f"pairs->reallocArray({a.id()}, &{ptr}, {d_ptr_addr}, {size});") - #self.print(f"pairs->reallocArray({a.id()}, (void **) &{ptr}, (void **) &d_{ptr}, {size});") + ptr_addr = self.generate_object_address(a) + d_ptr_addr = self.generate_object_address(a, device=True) + self.print(f"pairs_runtime->reallocArray({a.id()}, {ptr_addr}, {d_ptr_addr}, {size});") if isinstance(ast_node, DeclareVariable): + var_name = ast_node.var.name() tkw = Types.c_keyword(self.sim, ast_node.var.type()) + prefix_decl = f"{tkw} " if ast_node.var.temporary() else "" if ast_node.var.is_scalar(): var = self.generate_expression(ast_node.var) + addr = self.generate_object_address(ast_node.var) init = self.generate_expression(ast_node.var.init_value()) - self.print(f"{tkw} {var} = {init};") + self.print(f"{prefix_decl}{var} = {init};") + + if ast_node.var.runtime_track(): + self.print(f"pairs_runtime->trackVariable(\"{var_name}\", {addr});") else: for i in range(Types.number_of_elements(self.sim, ast_node.var.type())): var = self.generate_expression(ast_node.var, index=i) init = self.generate_expression(ast_node.var.init_value(), index=i) - self.print(f"{tkw} {var} = {init};") - + self.print(f"{prefix_decl}{var} = {init};") if not self.kernel_context and self.target.is_gpu() and ast_node.var.device_flag: - self.print(f"RuntimeVar<{tkw}> rv_{ast_node.var.name()} = pairs->addDeviceVariable(&({ast_node.var.name()}));") - #self.print(f"{tkw} *d_{ast_node.var.name()} = pairs->addDeviceVariable(&({ast_node.var.name()}));") + addr = self.generate_object_address(ast_node.var) + ref = self.generate_object_reference(ast_node.var, device=True) + self.print(f"{prefix_decl}{ref} = pairs_runtime->addDeviceVariable({addr});") if isinstance(ast_node, While): cond = self.generate_expression(ast_node.cond) @@ -749,9 +1063,13 @@ class CGen: self.generate_statement(ast_node.block) self.print("}") + if isinstance(ast_node, Return): + expr = self.generate_expression(ast_node.expr) + self.print(f"return {expr};") + def generate_expression(self, ast_node, mem=False, index=None): if isinstance(ast_node, Array): - return ast_node.name() + return self.generate_object_reference(ast_node) if isinstance(ast_node, ArrayAccess): if mem or ast_node.inlined is True: @@ -778,9 +1096,9 @@ class CGen: extra_params = [] if ast_node.name().startswith("pairs::"): - extra_params += ["pairs"] + extra_params += ["pairs_runtime"] - if ast_node.name() == "pairs->initDomain": + if ast_node.name() == "pairs_runtime->initDomain": extra_params += ["&argc", "&argv"] params = ", ".join(extra_params + [str(self.generate_expression(p)) for p in ast_node.parameters()]) @@ -792,22 +1110,24 @@ class CGen: return f"({tkw})({expr})" if isinstance(ast_node, ContactProperty): - return ast_node.name() + return self.generate_object_reference(ast_node) if isinstance(ast_node, Deref): var = self.generate_expression(ast_node.var) - return f"(*{var})" + # Dereferences are ignored for write variables when full objects + # are generated since they can be directly written into + return var if (self.generate_full_object_names or ast_node.var.force_read) else f"(*{var})" if isinstance(ast_node, DeviceStaticRef): elem = self.generate_expression(ast_node.elem) - return f"d_{elem}" + return f"{elem}_d" if isinstance(ast_node, FeatureProperty): - return ast_node.name() + return self.generate_object_reference(ast_node) if isinstance(ast_node, HostRef): elem = self.generate_expression(ast_node.elem) - return f"h_{elem}" + return f"{elem}_h" if isinstance(ast_node, Iter): assert mem is False, "Iterator is not lvalue!" @@ -817,13 +1137,22 @@ class CGen: assert mem is False, "Literal is not lvalue!" if ast_node.type() == Types.String: return f"\"{ast_node.value}\"" + + if ast_node.type() == Types.Boolean: + if ast_node.value == True: + return "true" + if ast_node.value == False: + return "false" if not ast_node.is_scalar(): assert index is not None, "Index must be set for non-scalar literals." return ast_node.value[index] if isinstance(ast_node.value, float) and math.isinf(ast_node.value): - return f"std::numeric_limits<{self.real_type()}>::infinity()" + if self.kernel_context: + return "CUDART_INF" + else: + return f"std::numeric_limits<{self.real_type()}>::infinity()" return ast_node.value @@ -837,7 +1166,7 @@ class CGen: return f"{ast_node.name()}" if isinstance(ast_node, Property): - return ast_node.name() + return self.generate_object_reference(ast_node) if isinstance(ast_node, PropertyAccess): assert ast_node.is_scalar() or index is not None, \ @@ -911,8 +1240,11 @@ class CGen: return f"{ast_node.name()}" if isinstance(ast_node, Var): - return ast_node.name() if ast_node.is_scalar() else f"{ast_node.name()}_{index}" - + return self.generate_object_reference(ast_node, index=index) + + if isinstance(ast_node, Parameter): + return ast_node.name() + if isinstance(ast_node, VectorAccess): return self.generate_expression(ast_node.expr, mem, self.generate_expression(ast_node.index)) diff --git a/src/pairs/code_gen/interface.py b/src/pairs/code_gen/interface.py new file mode 100644 index 0000000000000000000000000000000000000000..6ed3f7f2b511137000e87f8db64c4a70894efd35 --- /dev/null +++ b/src/pairs/code_gen/interface.py @@ -0,0 +1,251 @@ +from pairs.ir.block import Block, pairs_interface_block +from pairs.ir.functions import Call_Void, Call, Call_Int +from pairs.ir.parameters import Parameter +from pairs.ir.ret import Return +from pairs.ir.scalars import ScalarOp +from pairs.sim.domain import UpdateDomain +from pairs.sim.cell_lists import BuildCellListsStencil +from pairs.sim.comm import Synchronize, Borders, Exchange, ReverseComm +from pairs.ir.types import Types +from pairs.ir.branches import Filter, Branch +from pairs.sim.cell_lists import BuildCellLists, BuildCellListsStencil, PartitionCellLists, BuildCellNeighborLists +from pairs.sim.neighbor_lists import BuildNeighborLists +from pairs.sim.variables import DeclareVariables +from pairs.sim.arrays import DeclareArrays +from pairs.sim.properties import AllocateProperties, AllocateContactProperties, ResetVolatileProperties +from pairs.sim.features import AllocateFeatureProperties +from pairs.sim.instrumentation import RegisterMarkers, RegisterTimers +from pairs.sim.grid import MutableGrid +from pairs.sim.domain_partitioners import DomainPartitioners +from pairs.ir.print import PrintCode +from pairs.ir.assign import Assign +from pairs.sim.contact_history import BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus +from pairs.sim.thermo import ComputeThermo + +class InterfaceModules: + def __init__(self, sim): + self.sim = sim + + def create_all(self): + self.initialize() + self.setup_sim() + self.update_domain() + self.update_cells(self.sim.reneighbor_frequency) + self.communicate(self.sim.reneighbor_frequency) + self.reverse_comm() + self.reset_volatiles() + + if self.sim._use_contact_history: + if self.neighbor_lists: + self.build_contact_history(self.sim.reneighbor_frequency) + self.reset_contact_history() + + if self.sim._compute_thermo != 0: + self.compute_thermo(self.sim._compute_thermo) + + self.rank() + self.nlocal() + self.nghost() + self.size() + self.end() + + @pairs_interface_block + def initialize(self): + self.sim.module_name('initialize') + nprops = self.sim.properties.nprops() + ncontactprops = self.sim.contact_properties.nprops() + narrays = self.sim.arrays.narrays() + part = DomainPartitioners.c_keyword(self.sim.partitioner()) + + PrintCode(self.sim, f"pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});") + PrintCode(self.sim, f"pobj = new PairsObjects();") + + inits = Block.from_list(self.sim, [ + DeclareVariables(self.sim), + DeclareArrays(self.sim), + AllocateProperties(self.sim), + AllocateContactProperties(self.sim), + AllocateFeatureProperties(self.sim), + RegisterTimers(self.sim), + RegisterMarkers(self.sim) + ]) + + if self.sim.create_domain_at_initialization: + self.sim.add_statement(Block.merge_blocks(inits, self.sim.create_domain)) + else: + assert self.sim.grid is None, "A grid already exists" + self.sim.grid = MutableGrid(self.sim, self.sim.dims) + self.sim.add_statement(inits) + + @pairs_interface_block + def setup_sim(self): + self.sim.module_name('setup_sim') + + if self.sim.cell_lists.runtime_spacing: + for d in range(self.sim.dims): + Assign(self.sim, self.sim.cell_lists.spacing[d], Parameter(self.sim, f'cell_spacing_d{d}', Types.Real)) + + if self.sim.cell_lists.runtime_cutoff_radius: + Assign(self.sim, self.sim.cell_lists.cutoff_radius, Parameter(self.sim, 'cutoff_radius', Types.Real)) + + self.sim.add_statement(self.sim.setup_particles) + # This update assumes all particles have been created exactly in the rank that contains them + self.sim.add_statement(UpdateDomain(self.sim)) + self.sim.add_statement(BuildCellListsStencil(self.sim, self.sim.cell_lists)) + + @pairs_interface_block + def update_domain(self): + self.sim.module_name('update_domain') + self.sim.add_statement(Exchange(self.sim._comm)) # Local particles must be contained in their owners before domain update + self.sim.add_statement(UpdateDomain(self.sim)) + # Exchange is not needed after update since all locals are contained in thier owners + self.sim.add_statement(Borders(self.sim._comm)) # Ghosts must be recreated after update + self.sim.add_statement(ResetVolatileProperties(self.sim)) # Reset volatile includes the new locals + self.sim.add_statement(BuildCellListsStencil(self.sim, self.sim.cell_lists)) # Rebuild stencil since subdom sizes have changed + self.sim.add_statement(self.sim.update_cells_procedures) + + @pairs_interface_block + def reset_volatiles(self): + self.sim.module_name('reset_volatiles') + self.sim.add_statement(ResetVolatileProperties(self.sim)) + + @pairs_interface_block + def update_cells(self, reneighbor_frequency=1): + self.sim.module_name('update_cells') + timestep = Parameter(self.sim, f'timestep', Types.Int32) + cond = ScalarOp.inline(ScalarOp.or_op( + ScalarOp.cmp((timestep + 1) % reneighbor_frequency, 0), + ScalarOp.cmp(timestep, 0) + )) + + self.sim.add_statement(Filter(self.sim, cond, self.sim.update_cells_procedures)) + + @pairs_interface_block + def communicate(self, reneighbor_frequency=1): + self.sim.module_name('communicate') + timestep = Parameter(self.sim, f'timestep', Types.Int32) + cond = ScalarOp.inline(ScalarOp.or_op( + ScalarOp.cmp((timestep + 1) % reneighbor_frequency, 0), + ScalarOp.cmp(timestep, 0) + )) + + exchange = Filter(self.sim, cond, Exchange(self.sim._comm)) + border_sync = Branch(self.sim, cond, + blk_if = Borders(self.sim._comm), + blk_else = Synchronize(self.sim._comm)) + + self.sim.add_statement(exchange) + self.sim.add_statement(border_sync) + + # TODO: Maybe remove this from here, but volatiles must always be reset after exchange + self.sim.add_statement(Filter(self.sim, cond, Block(self.sim, ResetVolatileProperties(self.sim)))) + + @pairs_interface_block + def reverse_comm(self): + self.sim.module_name('reverse_comm') + self.sim.add_statement(ReverseComm(self.sim._comm, reduce=True)) + + @pairs_interface_block + def build_contact_history(self, reneighbor_frequency=1): + self.sim.module_name('build_contact_history') + timestep = Parameter(self.sim, f'timestep', Types.Int32) + cond = ScalarOp.inline(ScalarOp.or_op( + ScalarOp.cmp((timestep + 1) % reneighbor_frequency, 0), + ScalarOp.cmp(timestep, 0) + )) + + self.sim.add_statement( + Filter(self.sim, cond, + BuildContactHistory(self.sim, self.sim._contact_history, self.sim.cell_lists))) + + @pairs_interface_block + def reset_contact_history(self): + self.sim.module_name('reset_contact_history') + self.sim.add_statement(ResetContactHistoryUsageStatus(self.sim, self.sim._contact_history)) + self.sim.add_statement(ClearUnusedContactHistory(self.sim, self.sim._contact_history)) + + @pairs_interface_block + def compute_thermo(self): + self.sim.module_name('compute_thermo') + self.sim.add_statement(ComputeThermo(self.sim)) + + @pairs_interface_block + def rank(self): + self.sim.module_name('rank') + Return(self.sim, self.sim.domain_partitioning().rank) + + @pairs_interface_block + def nlocal(self): + self.sim.module_name('nlocal') + Return(self.sim, self.sim.nlocal) + + @pairs_interface_block + def nghost(self): + self.sim.module_name('nghost') + Return(self.sim, self.sim.nghost) + + @pairs_interface_block + def size(self): + self.sim.module_name('size') + Return(self.sim, ScalarOp.inline(self.sim.nlocal + self.sim.nghost)) + + @pairs_interface_block + def create_sphere(self): + self.sim.module_name('create_sphere') + x = Parameter(self.sim, 'x', Types.Real) + y = Parameter(self.sim, 'y', Types.Real) + z = Parameter(self.sim, 'z', Types.Real) + vx = Parameter(self.sim, 'vx', Types.Real) + vy = Parameter(self.sim, 'vy', Types.Real) + vz = Parameter(self.sim, 'vz', Types.Real) + density = Parameter(self.sim, 'density', Types.Real) + radius = Parameter(self.sim, 'radius', Types.Real) + ptype = Parameter(self.sim, 'type', Types.Real) + flag = Parameter(self.sim, 'flag', Types.Real) + + Return(self.sim, Call(self.sim, "pairs::create_sphere", + [x, y, z, vx, vy, vz, + density, radius, ptype, flag], Types.UInt64)) + + @pairs_interface_block + def create_halfspace(self): + self.sim.module_name('create_halfspace') + x = Parameter(self.sim, 'x', Types.Real) + y = Parameter(self.sim, 'y', Types.Real) + z = Parameter(self.sim, 'z', Types.Real) + nx = Parameter(self.sim, 'nx', Types.Real) + ny = Parameter(self.sim, 'ny', Types.Real) + nz = Parameter(self.sim, 'nz', Types.Real) + ptype = Parameter(self.sim, 'type', Types.Real) + flag = Parameter(self.sim, 'flag', Types.Real) + + Return(self.sim, Call(self.sim, "pairs::create_halfspace", + [x, y, z, nx, ny, nz, ptype, flag], Types.UInt64)) + + @pairs_interface_block + def dem_sc_grid(self): + self.sim.module_name('dem_sc_grid') + xmax = Parameter(self.sim, 'xmax', Types.Real) + ymax = Parameter(self.sim, 'ymax', Types.Real) + zmax = Parameter(self.sim, 'zmax', Types.Real) + spacing = Parameter(self.sim, 'spacing', Types.Real) + diameter = Parameter(self.sim, 'diameter', Types.Real) + min_diameter = Parameter(self.sim, 'min_diameter', Types.Real) + max_diameter = Parameter(self.sim, 'max_diameter', Types.Real) + initial_velocity = Parameter(self.sim, 'initial_velocity', Types.Real) + particle_density = Parameter(self.sim, 'particle_density', Types.Real) + ntypes = Parameter(self.sim, 'ntypes', Types.Int32) + + Assign(self.sim, self.sim.nlocal, + Call_Int(self.sim, "pairs::dem_sc_grid", + [xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, + initial_velocity, particle_density, ntypes])) + Return(self.sim, self.sim.nlocal) + + @pairs_interface_block + def end(self): + self.sim.module_name('end') + Call_Void(self.sim, "pairs::print_timers", []) + Call_Void(self.sim, "pairs::print_stats", [self.sim.nlocal, self.sim.nghost]) + PrintCode(self.sim, "delete pobj;") + PrintCode(self.sim, "delete pairs_runtime;") diff --git a/src/pairs/code_gen/printer.py b/src/pairs/code_gen/printer.py index 4d73b7679f7f8c52ff1878339dbc0479cb9db215..3eabef6f95c22b6d2f16d168a97437cb1fe00d9f 100644 --- a/src/pairs/code_gen/printer.py +++ b/src/pairs/code_gen/printer.py @@ -1,4 +1,6 @@ class Printer: + + line_id = 0 def __init__(self, output): self.output = output self.stream = None @@ -16,4 +18,5 @@ class Printer: def __call__(self, text): assert self.stream is not None, "Invalid stream!" + Printer.line_id += 1 self.stream.write(self.indent * ' ' + text + '\n') diff --git a/src/pairs/ir/block.py b/src/pairs/ir/block.py index 1a0809b811840a6f0d0edbccfd1fb68a9712992f..2a14ea2776dcf3651a8f9d03b397bf9db1bd1fb6 100644 --- a/src/pairs/ir/block.py +++ b/src/pairs/ir/block.py @@ -42,6 +42,21 @@ def pairs_device_block(func): return inner +def pairs_interface_block(func): + def inner(*args, **kwargs): + sim = args[0].sim # self.sim + sim.init_block() + func(*args, **kwargs) + return Module(sim, + name=sim._module_name, + block=Block(sim, sim._block), + resizes_to_check=sim._resizes_to_check, + check_properties_resize=sim._check_properties_resize, + run_on_device=False, + interface=True) + + return inner + class Block(ASTNode): def __init__(self, sim, stmts): super().__init__(sim) diff --git a/src/pairs/ir/declaration.py b/src/pairs/ir/declaration.py index 3e26c57fd82b2e59d24fb1043abe823714a8ae9c..35992d52e2bbebd0c59e0a7502a16939cb3b1788 100644 --- a/src/pairs/ir/declaration.py +++ b/src/pairs/ir/declaration.py @@ -7,7 +7,7 @@ class Decl(ASTNode): self.elem = elem def __str__(self): - return f"Decl<self.elem>" + return f"Decl<{self.elem}>" def children(self): return [self.elem] diff --git a/src/pairs/ir/device.py b/src/pairs/ir/device.py index 952ff1429c9eb45dd47bc40edcb2d3b53fb09f38..0dfd6b9ad0ebf802429cc1cacdf9339bd871a816 100644 --- a/src/pairs/ir/device.py +++ b/src/pairs/ir/device.py @@ -62,6 +62,9 @@ class CopyProperty(ASTNode): self._action = action self.sim.add_statement(self) + def __str__(self): + return f"CopyProperty<{self._prop}>" + def prop(self): return self._prop @@ -72,9 +75,31 @@ class CopyProperty(ASTNode): return self._action def children(self): - return [self._prop] + return [self._prop, self.sim.nghost, self.sim.nlocal] + +class CopyFeatureProperty(ASTNode): + def __init__(self, sim, prop, ctx, action): + super().__init__(sim) + self._prop = prop + self._ctx = ctx + self._action = action + self.sim.add_statement(self) + + def __str__(self): + return f"CopyFeatureProperty<{self._prop}>" + + def prop(self): + return self._prop + def context(self): + return self._ctx + def action(self): + return self._action + + def children(self): + return [self._prop] + class CopyContactProperty(ASTNode): def __init__(self, sim, prop, ctx, action): super().__init__(sim) diff --git a/src/pairs/ir/features.py b/src/pairs/ir/features.py index d709336b0971a7361e0cb5159d36bc975605cb28..24bb365f3e951d55d479150c8d5ebb60a9f88f0b 100644 --- a/src/pairs/ir/features.py +++ b/src/pairs/ir/features.py @@ -131,9 +131,7 @@ class FeatureProperty(ASTNode): self.feature_prop_feature.nkinds()] def array_size(self): - nelems = self.feature_prop_feature.nkinds() * \ - Types.number_of_elements(self.sim, self.feature_prop_type) - return nelems * nelems + return self.feature_prop_feature.nkinds()**2 * Types.number_of_elements(self.sim, self.feature_prop_type) def __getitem__(self, expr): return FeaturePropertyAccess(self.sim, self, expr) @@ -161,7 +159,7 @@ class FeaturePropertyAccess(ASTTerm): sizes = feature_prop.sizes() layout = feature_prop.layout() - for elem in range(Types.number_of_elements(feature_prop.type())): + for elem in range(Types.number_of_elements(self.sim, feature_prop.type())): if layout == Layouts.AoS: self.vector_indexes[elem] = self.index * sizes[0] + elem elif layout == Layouts.SoA: diff --git a/src/pairs/ir/functions.py b/src/pairs/ir/functions.py index b18406bf77bc5d94c22387864435fc677680ea8e..ffb49d53717b9a15940c6c9039ddd1ee6c7baf69 100644 --- a/src/pairs/ir/functions.py +++ b/src/pairs/ir/functions.py @@ -11,6 +11,9 @@ class Call(ASTTerm): self.params = [Lit.cvt(sim, p) for p in params] self.return_type = return_type + def __str__(self): + return f"Call<{self.func_name}, {self.params}>" + def name(self): return self.func_name @@ -28,8 +31,13 @@ class Call_Int(Call): def __init__(self, sim, func_name, parameters): super().__init__(sim, func_name, parameters, Types.Int32) + def __str__(self): + return f"Call_Int<{self.func_name}, {self.params}>" class Call_Void(Call): def __init__(self, sim, func_name, parameters): - super().__init__(sim, func_name, parameters, Types.Invalid) + super().__init__(sim, func_name, parameters, Types.Void) sim.add_statement(self) + + def __str__(self): + return f"Cal_Void<{self.func_name}, {self.params}>" diff --git a/src/pairs/ir/kernel.py b/src/pairs/ir/kernel.py index 04def29cf9153a8ec7f5048f79579bd062186dd0..4f477ca1b50a7cdce7b9a13894cd96f10e26f769 100644 --- a/src/pairs/ir/kernel.py +++ b/src/pairs/ir/kernel.py @@ -8,7 +8,9 @@ from pairs.ir.matrices import MatrixOp from pairs.ir.properties import Property, ContactProperty from pairs.ir.quaternions import QuaternionOp from pairs.ir.variables import Var +from pairs.ir.parameters import Parameter from pairs.ir.vectors import VectorOp +from pairs.ir.loops import Iter class Kernel(ASTNode): @@ -19,6 +21,8 @@ class Kernel(ASTNode): self._id = Kernel.last_kernel self._name = name if name is not None else "kernel" + str(Kernel.last_kernel) self._variables = {} + self._parameters = {} + self._iters = {} self._arrays = {} self._properties = {} self._contact_properties = {} @@ -50,6 +54,12 @@ class Kernel(ASTNode): def variables(self): return self._variables + def parameters(self): + return self._parameters + + def iters(self): + return self._iters + def read_only_variables(self): return [var for var in self._variables if self._variables[var] == Actions.ReadOnly] @@ -99,6 +109,28 @@ class Kernel(ASTNode): action = Actions.NoAction if var not in self._variables else self._variables[var] self._variables[var] = Actions.update_rule(action, new_op) + + def add_parameter(self, parameter, write=False): + parameter_list = parameter if isinstance(parameter, list) else [parameter] + new_op = 'w' if write else 'r' + + for param in parameter_list: + assert isinstance(param, Parameter), \ + "Module.add_parameter(): given element is not of type Parameter!" + + action = Actions.NoAction if param not in self._parameters else self._parameters[param] + self._parameters[param] = Actions.update_rule(action, new_op) + + def add_iter(self, iter, write=False): + iter_list = iter if isinstance(iter, list) else [iter] + new_op = 'w' if write else 'r' + + for it in iter_list: + assert isinstance(it, Iter), \ + "Kernel.add_iter(): Element is not of type Iter." + + action = Actions.NoAction if it not in self._iters else self._iters[it] + self._iters[it] = Actions.update_rule(action, new_op) def add_property(self, prop, write=False): prop_list = prop if isinstance(prop, list) else [prop] diff --git a/src/pairs/ir/loops.py b/src/pairs/ir/loops.py index 997fda5c6fa73938a74c8fb52f453caa1bd60a21..8842818627c7da514ec9a24ae85c84a5f08cd747 100644 --- a/src/pairs/ir/loops.py +++ b/src/pairs/ir/loops.py @@ -18,6 +18,7 @@ class Iter(ASTTerm): super().__init__(sim, ScalarOp) self.loop = loop self.iter_id = Iter.new_id() + self._ref_candidate = False def id(self): return self.iter_id @@ -27,7 +28,16 @@ class Iter(ASTTerm): def type(self): return Types.Int32 - + + def mark_as_ref_candidate(self): + self._ref_candidate = True + + def is_ref_candidate(self): + return self._ref_candidate + + def __hash__(self): + return hash(self.iter_id) + def __eq__(self, other): return isinstance(other, Iter) and self.iter_id == other.iter_id @@ -39,7 +49,7 @@ class Iter(ASTTerm): class For(ASTNode): - def __init__(self, sim, range_min, range_max, block=None): + def __init__(self, sim, range_min, range_max, block=None, not_kernel=False): super().__init__(sim) self.iterator = Iter(sim, self) self.min = Lit.cvt(sim, range_min) @@ -47,6 +57,7 @@ class For(ASTNode): self.block = Block(sim, []) if block is None else block self.kernel = None self._kernel_candidate = False + self.not_kernel = not_kernel def __str__(self): return f"For<{self.iterator}, {self.min} ... {self.max}>" @@ -63,6 +74,9 @@ class For(ASTNode): def mark_as_kernel_candidate(self): self._kernel_candidate = True + def mark_iter_as_ref_candidate(self): + self.iterator.mark_as_ref_candidate() + def is_kernel_candidate(self): return self._kernel_candidate diff --git a/src/pairs/ir/math.py b/src/pairs/ir/math.py index e85aa0678f7fbfc6372665fdfab78f346eb21e97..a6a156a4986a6a8b122dc70d8ca920ad18d29269 100644 --- a/src/pairs/ir/math.py +++ b/src/pairs/ir/math.py @@ -1,6 +1,7 @@ from pairs.ir.ast_term import ASTTerm from pairs.ir.scalars import ScalarOp from pairs.ir.types import Types +from pairs.ir.lit import Lit class MathFunction(ASTTerm): @@ -115,6 +116,7 @@ class Cos(MathFunction): class Ceil(MathFunction): def __init__(self, sim, expr): + expr = Lit.cvt(sim, expr) assert Types.is_real(expr.type()), "Expression must be of real type!" super().__init__(sim) self._params = [expr] diff --git a/src/pairs/ir/module.py b/src/pairs/ir/module.py index ab78942b2f43f946a714513fab4ad7d390442197..ded67ac6f4448590c346f51177b17fe364b729d0 100644 --- a/src/pairs/ir/module.py +++ b/src/pairs/ir/module.py @@ -4,15 +4,25 @@ from pairs.ir.ast_node import ASTNode from pairs.ir.features import FeatureProperty from pairs.ir.properties import Property, ContactProperty from pairs.ir.variables import Var +from pairs.ir.parameters import Parameter +from pairs.ir.types import Types class Module(ASTNode): last_module = 0 - def __init__(self, sim, name=None, block=None, resizes_to_check={}, check_properties_resize=False, run_on_device=False): + def __init__(self, sim, + name=None, + block=None, + resizes_to_check={}, + check_properties_resize=False, + run_on_device=False, + user_defined=False, + interface=False): super().__init__(sim) self._id = Module.last_module self._name = name if name is not None else "module" + str(Module.last_module) + self._parameters = {} self._variables = {} self._arrays = {} self._properties = {} @@ -23,8 +33,21 @@ class Module(ASTNode): self._resizes_to_check = resizes_to_check self._check_properties_resize = check_properties_resize self._run_on_device = run_on_device + self._user_defined = user_defined + self._interface = interface + self._return_type = Types.Void self._profile = False - sim.add_module(self) + + if user_defined: + assert not interface, ("User-defined modules can't be part of the interface directly." + "Wrap them inside seperate interface modules.") + sim.add_udf_module(self) + else: + if interface: + sim.add_interface_module(self) + else: + sim.add_module(self) + Module.last_module += 1 def __str__(self): @@ -45,6 +68,18 @@ class Module(ASTNode): @property def run_on_device(self): return self._run_on_device + + @property + def user_defined(self): + return self._user_defined + + @property + def interface(self): + return self._interface + + @property + def return_type(self): + return self._return_type def profile(self): self._profile = True @@ -53,6 +88,9 @@ class Module(ASTNode): def must_profile(self): return self._profile + def parameters(self): + return self._parameters + def variables(self): return self._variables @@ -99,6 +137,17 @@ class Module(ASTNode): action = Actions.NoAction if var not in self._variables else self._variables[var] self._variables[var] = Actions.update_rule(action, new_op) + def add_parameter(self, parameter, write=False): + parameter_list = parameter if isinstance(parameter, list) else [parameter] + new_op = 'w' if write else 'r' + + for param in parameter_list: + assert isinstance(param, Parameter), \ + "Module.add_parameter(): given element is not of type Parameter!" + + action = Actions.NoAction if param not in self._parameters else self._parameters[param] + self._parameters[param] = Actions.update_rule(action, new_op) + def add_property(self, prop, write=False): prop_list = prop if isinstance(prop, list) else [prop] new_op = 'w' if write else 'r' @@ -150,5 +199,8 @@ class ModuleCall(ASTNode): def module(self): return self._module + def __str__(self): + return f"ModuleCall<{self._module}>" + def children(self): return [self._module] diff --git a/src/pairs/ir/mutator.py b/src/pairs/ir/mutator.py index bbe06f9ce371d818111cf597ab36e99f65fe5137..3fb017f80cc517c309075658225a1e6a4d435e03 100644 --- a/src/pairs/ir/mutator.py +++ b/src/pairs/ir/mutator.py @@ -54,6 +54,18 @@ class Mutator: ast_node._reduction_variable = self.mutate(ast_node._reduction_variable) return ast_node + + def mutate_Return(self, ast_node): + ast_node.expr = self.mutate(ast_node.expr) + return ast_node + + def mutate_Print(self, ast_node): + ast_node.args = [self.mutate(arg) for arg in ast_node.args] + return ast_node + + def mutate_PrintCode(self, ast_node): + ast_node.arg = self.mutate(ast_node.arg) + return ast_node def mutate_ArrayAccess(self, ast_node): ast_node.array = self.mutate(ast_node.array) diff --git a/src/pairs/ir/parameters.py b/src/pairs/ir/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..d6b9ab6457ad53354f07c025fad0b8c71c7cbbbb --- /dev/null +++ b/src/pairs/ir/parameters.py @@ -0,0 +1,18 @@ +from pairs.ir.ast_term import ASTTerm +from pairs.ir.operator_class import OperatorClass + + +class Parameter(ASTTerm): + def __init__(self, sim, param_name, param_type): + super().__init__(sim, OperatorClass.from_type(param_type)) + self.param_name = param_name + self.param_type = param_type + + def __str__(self): + return f"Parameter<{self.param_name}>" + + def name(self): + return self.param_name + + def type(self): + return self.param_type diff --git a/src/pairs/ir/print.py b/src/pairs/ir/print.py new file mode 100644 index 0000000000000000000000000000000000000000..58c2a6e1d062342e6e2d6511fccc8ee75a7ce55a --- /dev/null +++ b/src/pairs/ir/print.py @@ -0,0 +1,24 @@ +from pairs.ir.ast_node import ASTNode +from pairs.ir.lit import Lit + +class Print(ASTNode): + def __init__(self, sim, *args): + super().__init__(sim) + self.args = [Lit.cvt(sim, a) for a in args] + self.sim.add_statement(self) + + def children(self): + return self.args + + def __str__(self): + return "Print<" + ", ".join(str(arg) for arg in self.args) + ">" + +class PrintCode(ASTNode): + def __init__(self, sim, str): + super().__init__(sim) + self.arg = Lit.cvt(sim, str) + self.sim.add_statement(self) + + def children(self): + return self.arg + \ No newline at end of file diff --git a/src/pairs/ir/properties.py b/src/pairs/ir/properties.py index f10f9424b82f721b3564f6fdc9899f94aca4bc7e..21ed08c918529f509047c0075a2670a5922af5b0 100644 --- a/src/pairs/ir/properties.py +++ b/src/pairs/ir/properties.py @@ -16,8 +16,8 @@ class Properties: self.props = [] self.defs = {} - def add(self, p_name, p_type, p_value, p_volatile, p_layout=Layouts.AoS): - p = Property(self.sim, p_name, p_type, p_value, p_volatile, p_layout) + def add(self, p_name, p_type, p_value, p_volatile, p_layout=Layouts.AoS, p_reduce=False): + p = Property(self.sim, p_name, p_type, p_value, p_volatile, p_layout, p_reduce) self.props.append(p) self.defs[p_name] = p_value return p @@ -27,6 +27,9 @@ class Properties: def all(self): return self.props + + def reduction_props(self): + return [p for p in self.props if p.reduce is True] def volatiles(self): return [p for p in self.props if p.volatile is True] @@ -51,7 +54,7 @@ class Properties: class Property(ASTNode): last_prop_id = 0 - def __init__(self, sim, name, dtype, default, volatile, layout=Layouts.AoS): + def __init__(self, sim, name, dtype, default, volatile, layout=Layouts.AoS, reduce=False): super().__init__(sim) self.prop_id = Property.last_prop_id self.prop_name = name @@ -59,6 +62,7 @@ class Property(ASTNode): self.prop_layout = layout self.default_value = default self.volatile = volatile + self.reduce = reduce self.device_flag = False Property.last_prop_id += 1 @@ -83,6 +87,9 @@ class Property(ASTNode): def default(self): return self.default_value + def is_volatile(self): + return self.volatile + def ndims(self): return 1 if Types.is_scalar(self.prop_type) else 2 diff --git a/src/pairs/ir/ret.py b/src/pairs/ir/ret.py new file mode 100644 index 0000000000000000000000000000000000000000..bb235044e87e5ca08f2587b2e406399e796865f8 --- /dev/null +++ b/src/pairs/ir/ret.py @@ -0,0 +1,13 @@ +from pairs.ir.ast_node import ASTNode + +class Return(ASTNode): + def __init__(self, sim, expr): + super().__init__(sim) + self.expr = expr + self.sim.add_statement(self) + + def __str__(self): + return f"Return<{self.expr}>" + + def children(self): + return [self.expr] \ No newline at end of file diff --git a/src/pairs/ir/types.py b/src/pairs/ir/types.py index ea1d40c784552d9f8bd922210e282f5a5904f400..c2548cdd5beb799839d3d88df92a04e28bb92311 100644 --- a/src/pairs/ir/types.py +++ b/src/pairs/ir/types.py @@ -1,5 +1,6 @@ class Types: - Invalid = -1 + Invalid = -2 + Void = -1 Int32 = 0 Int64 = 1 UInt64 = 2 @@ -13,6 +14,23 @@ class Types: Matrix = 10 Quaternion = 11 + def c_accessor_keyword(sim, t): + real_kw = 'double' if sim.use_double_precision() else 'float' + return ( + real_kw if t==Types.Real + else f'pairs::Vector3<{real_kw}>' if t==Types.Vector + else f'pairs::Matrix3<{real_kw}>' if t==Types.Matrix + else f'pairs::Quaternion<{real_kw}>' if t==Types.Quaternion + else 'float' if t == Types.Float + else 'double' if t == Types.Double + else 'int' if t == Types.Int32 + else 'int64_t' if t == Types.Int64 + else 'uint64_t' if t == Types.UInt64 + else 'bool' if t == Types.Boolean + else 'void' if t == Types.Void + else '<invalid type>' + ) + def c_keyword(sim, t): real_kw = 'double' if sim.use_double_precision() else 'float' return ( @@ -20,14 +38,16 @@ class Types: else 'float' if t == Types.Float else 'double' if t == Types.Double else 'int' if t == Types.Int32 - else 'long long int' if t == Types.Int64 - else 'unsigned long long int' if t == Types.UInt64 + else 'int64_t' if t == Types.Int64 + else 'uint64_t' if t == Types.UInt64 else 'bool' if t == Types.Boolean + else 'void' if t == Types.Void else '<invalid type>' ) def c_property_keyword(t): return "Prop_Integer" if t == Types.Int32 else \ + "Prop_UInt64" if t == Types.UInt64 else \ "Prop_Real" if t == Types.Real else \ "Prop_Vector" if t == Types.Vector else \ "Prop_Matrix" if t == Types.Matrix else \ diff --git a/src/pairs/ir/utils.py b/src/pairs/ir/utils.py index 33495668bc00d74021af97b24689513f86357bd5..2a0a707eebd7546745f6dd4017449ead7beb51cb 100644 --- a/src/pairs/ir/utils.py +++ b/src/pairs/ir/utils.py @@ -12,11 +12,3 @@ def is_terminal(node): terminal_types = (Array, ContactProperty, FeatureProperty, Iter, Neighbor, Property, Symbol, Var) return any([isinstance(node, _type) for _type in terminal_types]) - -class Print(ASTNode): - def __init__(self, sim, string): - super().__init__(sim) - self.string = string - - def __str__(self): - return f"Print<{self.string}>" diff --git a/src/pairs/ir/variables.py b/src/pairs/ir/variables.py index 3d96065c726c838f5de39710a345a8d7ea68a856..00bcc698214a3938b5ae5c3b9b6917114f596c5c 100644 --- a/src/pairs/ir/variables.py +++ b/src/pairs/ir/variables.py @@ -18,8 +18,8 @@ class Variables: self.vars = [] self.nvars = 0 - def add(self, v_name, v_type, v_value=0): - var = Var(self.sim, v_name, v_type, v_value) + def add(self, v_name, v_type, v_value=0, v_runtime_track=False): + var = Var(self.sim, v_name, v_type, v_value, v_runtime_track) self.vars.append(var) return var @@ -39,15 +39,17 @@ class Variables: class Var(ASTTerm): - def __init__(self, sim, var_name, var_type, init_value=0, temp=False): + def __init__(self, sim, var_name, var_type, init_value=0, runtime_track=False, temp=False): super().__init__(sim, OperatorClass.from_type(var_type)) self.var_name = var_name self.var_type = var_type self.var_init_value = Lit.cvt(sim, init_value) + self.var_runtime_track = runtime_track self.var_temporary = temp self.mutable = True self.var_bonded_arrays = [] self.device_flag = False + self.force_read = False if temp: DeclareVariable(sim, self) @@ -74,6 +76,9 @@ class Var(ASTTerm): def init_value(self): return self.var_init_value + def runtime_track(self): + return self.var_runtime_track + def add_bonded_array(self, array): self.var_bonded_arrays.append(array) @@ -100,6 +105,10 @@ class Deref(ASTTerm): def var(self): return self._var + def copy(self, deep=False): + # Terminal copies are just themselves + return self + def type(self): return self._var.type() diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py index f77410043a102cf72a5719bec54e482ccb336afc..b4ec2f8f5fcb083a92f3eb5a43f6f33cc49093a5 100644 --- a/src/pairs/mapping/funcs.py +++ b/src/pairs/mapping/funcs.py @@ -7,6 +7,7 @@ from pairs.ir.loops import For, ParticleFor from pairs.ir.operators import Operators from pairs.ir.operator_class import OperatorClass from pairs.ir.properties import ContactProperty +from pairs.ir.parameters import Parameter from pairs.ir.scalars import ScalarOp from pairs.ir.types import Types from pairs.mapping.keywords import Keywords @@ -80,16 +81,16 @@ class BuildParticleIR(ast.NodeVisitor): raise Exception("Invalid operator: {}".format(ast.dump(op))) - def __init__(self, sim, ctx_symbols={}): + def __init__(self, sim, ctx_symbols={}, func_params={}): self.sim = sim self.ctx_symbols = ctx_symbols.copy() + self.func_params = func_params.copy() self.keywords = Keywords(sim) def add_symbols(self, symbols): self.ctx_symbols.update(symbols) def visit_Assign(self, node): - #print(ast.dump(node)) assert len(node.targets) == 1, "Only one target is allowed on assignments!" lhs = self.visit(node.targets[0]) rhs = self.visit(node.value) @@ -102,15 +103,16 @@ class BuildParticleIR(ast.NodeVisitor): def visit_AugAssign(self, node): lhs = self.visit(node.target) + # We need a copy of the target object so it is properly visited during + # compiler analyses and transformations + lhs_copy = self.visit(node.target) rhs = self.visit(node.value) op_class = OperatorClass.from_type_list([lhs.type(), rhs.type()]) - bin_op = op_class(self.sim, lhs, rhs, BuildParticleIR.get_binary_op(node.op)) + bin_op = op_class(self.sim, lhs_copy, rhs, BuildParticleIR.get_binary_op(node.op)) - if isinstance(lhs, UndefinedSymbol): - self.add_symbols({lhs.symbol_id: bin_op}) - rhs.set_label(lhs.symbol_id) - else: - Assign(self.sim, lhs, bin_op) + assert not isinstance(lhs, UndefinedSymbol), \ + f"Invalid AugAssign: symbol {lhs} not defined yet!" + Assign(self.sim, lhs, bin_op) def visit_BinOp(self, node): #print(ast.dump(node)) @@ -178,7 +180,7 @@ class BuildParticleIR(ast.NodeVisitor): def visit_If(self, node): condition = self.visit(node.test) - one_way = node.orelse is None + one_way = node.orelse is None or len(node.orelse) == 0 if one_way: for _ in Filter(self.sim, condition): @@ -210,6 +212,7 @@ class BuildParticleIR(ast.NodeVisitor): def visit_Name(self, node): symbol_types = [ self.ctx_symbols.get, + self.func_params.get, self.sim.array, self.sim.property, self.sim.feature_property, @@ -282,7 +285,11 @@ class BuildParticleIR(ast.NodeVisitor): return op_class(self.sim, operand, None, BuildParticleIR.get_unary_op(node.op)) -def compute(sim, func, cutoff_radius=None, symbols={}, pre_step=False, skip_first=False): +def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=False, skip_first=False): + if sim._generate_whole_program: + assert not parameters, "Compute functions can't take custom parameters when generating whole program." + + src = inspect.getsource(func) tree = ast.parse(src, mode='exec') #print(ast.dump(ast.parse(src, mode='exec'))) @@ -298,6 +305,7 @@ def compute(sim, func, cutoff_radius=None, symbols={}, pre_step=False, skip_firs # Convert literal symbols symbols = {symbol: Lit.cvt(sim, value) for symbol, value in symbols.items()} + parameters = {pname: Parameter(sim, pname, ptype) for pname, ptype in parameters.items()} sim.init_block() sim.module_name(func.__name__) @@ -305,14 +313,14 @@ def compute(sim, func, cutoff_radius=None, symbols={}, pre_step=False, skip_firs if nparams == 1: for i in ParticleFor(sim): for _ in Filter(sim, ScalarOp.cmp(sim.particle_flags[i] & Flags.Fixed, 0)): - ir = BuildParticleIR(sim, symbols) + ir = BuildParticleIR(sim, symbols, parameters) ir.add_symbols({params[0]: i}) ir.visit(tree) else: for interaction_data in ParticleInteraction(sim, nparams, cutoff_radius): # Start building IR - ir = BuildParticleIR(sim, symbols) + ir = BuildParticleIR(sim, symbols, parameters) ir.add_symbols({ params[0]: interaction_data.i(), params[1]: interaction_data.j(), @@ -327,12 +335,13 @@ def compute(sim, func, cutoff_radius=None, symbols={}, pre_step=False, skip_firs ir.visit(tree) - if pre_step: - sim.build_pre_step_module_with_statements(skip_first=skip_first, profile=True) - + if sim._generate_whole_program: + if pre_step: + sim.build_pre_step_module_with_statements(skip_first=skip_first, profile=True) + else: + sim.build_module_with_statements(skip_first=skip_first, profile=True) else: - sim.build_module_with_statements(skip_first=skip_first, profile=True) - + sim.build_user_defined_function() def setup(sim, func, symbols={}): src = inspect.getsource(func) @@ -358,4 +367,8 @@ def setup(sim, func, symbols={}): ir.add_symbols({params[0]: i}) ir.visit(tree) - sim.build_setup_module_with_statements() + if sim._generate_whole_program: + sim.build_setup_module_with_statements() + else: + sim.build_user_defined_function() + diff --git a/src/pairs/mapping/keywords.py b/src/pairs/mapping/keywords.py index ad10c960b8bf87b3f828417f7f8c3049727b23f5..51c255de3767f567182afa8dc5616e5f85151604 100644 --- a/src/pairs/mapping/keywords.py +++ b/src/pairs/mapping/keywords.py @@ -10,6 +10,7 @@ from pairs.ir.quaternions import Quaternion from pairs.ir.scalars import ScalarOp from pairs.ir.select import Select from pairs.ir.types import Types +from pairs.ir.print import Print from pairs.ir.vectors import Vector, ZeroVector from pairs.sim.shapes import Shapes @@ -30,6 +31,9 @@ class Keywords: def exists(self, keyword): method = self.get_method(f"keyword_{keyword}") return method is not None + + def keyword_printf(self, args): + Print(self.sim, *args) def keyword_is_point_mass(self, args): assert len(args) == 1, "is_point_mass() keyword requires one parameter." diff --git a/src/pairs/sim/cell_lists.py b/src/pairs/sim/cell_lists.py index 016ca1a96892e41502d97237a03edafec02d36be..4038f48fb628f6c00c55c5cf535b43e8ce1a9af3 100644 --- a/src/pairs/sim/cell_lists.py +++ b/src/pairs/sim/cell_lists.py @@ -11,21 +11,37 @@ from pairs.ir.math import Ceil from pairs.ir.scalars import ScalarOp from pairs.ir.select import Select from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print from pairs.sim.flags import Flags from pairs.sim.lowerable import Lowerable class CellLists: - def __init__(self, sim, dom_part, spacing, cutoff_radius): + def __init__(self, sim, dom_part, spacing=None, cutoff_radius=None): self.sim = sim self.dom_part = dom_part - self.spacing = spacing if isinstance(spacing, list) else [spacing for d in range(sim.ndims())] - self.cutoff_radius = cutoff_radius - self.nneighbor_cells = [math.ceil(cutoff_radius / self.spacing[d]) for d in range(sim.ndims())] - self.nstencil_max = reduce((lambda x, y: x * y), [self.nneighbor_cells[d] * 2 + 1 for d in range(sim.ndims())]) + + # Cell spacing and cutoff radius can be set at runtime + # only if they haven't been pre-set in the input script + if spacing: + self.spacing = spacing if isinstance(spacing, list) else [spacing for d in range(sim.ndims())] + self.runtime_spacing = False + else: + assert self.sim._generate_whole_program==False, "Cell spacing needs to be defined when generating whole program." + self.spacing = self.sim.add_array('spacing', self.sim.ndims(), Types.Real) + self.runtime_spacing = True + + if cutoff_radius: + self.cutoff_radius = cutoff_radius + self.runtime_cutoff_radius = False + else: + assert self.sim._generate_whole_program==False, "cutoff_radius needs to be defined when generating whole program." + self.cutoff_radius = self.sim.add_var('cutoff_radius', Types.Real) + self.runtime_cutoff_radius = True + # Data introduced in the simulation self.nstencil = self.sim.add_var('nstencil', Types.Int32) + self.nstencil_capacity = self.sim.add_var('nstencil_capacity', Types.Int32, 27) self.ncells = self.sim.add_var('ncells', Types.Int32, 1) self.ncells_capacity = self.sim.add_var('ncells_capacity', Types.Int32, 100000) self.cell_capacity = self.sim.add_var('cell_capacity', Types.Int32, 64) @@ -34,7 +50,7 @@ class CellLists: self.cell_particles = self.sim.add_array('cell_particles', [self.ncells_capacity, self.cell_capacity], Types.Int32) self.cell_sizes = self.sim.add_array('cell_sizes', self.ncells_capacity, Types.Int32) self.nshapes = self.sim.add_array('nshapes', [self.ncells_capacity, self.sim.max_shapes()], Types.Int32) - self.stencil = self.sim.add_array('stencil', self.nstencil_max, Types.Int32) + self.stencil = self.sim.add_array('stencil', self.nstencil_capacity, Types.Int32) self.particle_cell = self.sim.add_array('particle_cell', self.sim.particle_capacity, Types.Int32) if sim._store_neighbors_per_cell: @@ -52,8 +68,9 @@ class BuildCellListsStencil(Lowerable): def lower(self): stencil = self.cell_lists.stencil nstencil = self.cell_lists.nstencil + nstencil_capacity = self.cell_lists.nstencil_capacity spacing = self.cell_lists.spacing - nneighbor_cells = self.cell_lists.nneighbor_cells + cutoff_radius = self.cell_lists.cutoff_radius dim_ncells = self.cell_lists.dim_ncells ncells = self.cell_lists.ncells ncells_capacity = self.cell_lists.ncells_capacity @@ -63,6 +80,7 @@ class BuildCellListsStencil(Lowerable): self.sim.module_name("build_cell_lists_stencil") self.sim.check_resize(ncells_capacity, ncells) + self.sim.check_resize(nstencil_capacity, nstencil) for s in range(self.sim.max_shapes()): Assign(self.sim, shapes_buffer[s], self.sim.get_shape_id(s)) @@ -79,7 +97,7 @@ class BuildCellListsStencil(Lowerable): Assign(self.sim, nstencil, 0) for dim in range(self.sim.ndims()): - nneigh = nneighbor_cells[dim] + nneigh = Ceil(self.sim,(cutoff_radius / spacing[dim])) for dim_offset in For(self.sim, -nneigh, nneigh + 1): index = dim_offset if index is None else index * dim_ncells[dim] + dim_offset if dim == self.sim.ndims() - 1: diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index 3a439d8048517f2b4c8b0cb5023c1942787d71bd..00622afc06347dcd988bb9ec0cc0ddca5b7f4058 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -9,7 +9,7 @@ from pairs.ir.contexts import Contexts from pairs.ir.device import CopyArray from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor, While -from pairs.ir.utils import Print +from pairs.ir.print import Print, PrintCode from pairs.ir.select import Select from pairs.ir.sizeof import Sizeof from pairs.ir.types import Types @@ -23,42 +23,62 @@ class Comm: self.nsend_all = sim.add_var('nsend_all', Types.Int32) self.send_capacity = sim.add_var('send_capacity', Types.Int32, 200000) self.recv_capacity = sim.add_var('recv_capacity', Types.Int32, 200000) - self.elem_capacity = sim.add_var('elem_capacity', Types.Int32, 40) - self.neigh_capacity = sim.add_var('neigh_capacity', Types.Int32, 10) - self.nsend = sim.add_array('nsend', [self.neigh_capacity], Types.Int32) - self.send_offsets = sim.add_array('send_offsets', [self.neigh_capacity], Types.Int32) + self.elem_capacity = sim.add_var('elem_capacity', Types.Int32, 100) + self.nsend = sim.add_array('nsend', [dom_part.nranks_capacity], Types.Int32) + self.send_offsets = sim.add_array('send_offsets', [dom_part.nranks_capacity], Types.Int32) self.send_buffer = sim.add_array('send_buffer', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False) self.send_map = sim.add_array('send_map', [self.send_capacity], Types.Int32, arr_sync=False) self.exchg_flag = sim.add_array('exchg_flag', [sim.particle_capacity], Types.Int32, arr_sync=False) self.exchg_copy_to = sim.add_array('exchg_copy_to', [self.send_capacity], Types.Int32, arr_sync=False) self.send_mult = sim.add_array('send_mult', [self.send_capacity, sim.ndims()], Types.Int32) - self.nrecv = sim.add_array('nrecv', [self.neigh_capacity], Types.Int32) - self.recv_offsets = sim.add_array('recv_offsets', [self.neigh_capacity], Types.Int32) + self.nrecv = sim.add_array('nrecv', [dom_part.nranks_capacity], Types.Int32) + self.recv_offsets = sim.add_array('recv_offsets', [dom_part.nranks_capacity], Types.Int32) self.recv_buffer = sim.add_array('recv_buffer', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False) self.recv_map = sim.add_array('recv_map', [self.recv_capacity], Types.Int32) self.recv_mult = sim.add_array('recv_mult', [self.recv_capacity, sim.ndims()], Types.Int32) - self.nsend_contact = sim.add_array('nsend_contact', [self.neigh_capacity], Types.Int32) - self.nrecv_contact = sim.add_array('nrecv_contact', [self.neigh_capacity], Types.Int32) - self.contact_soffsets = sim.add_array('contact_soffsets', [self.neigh_capacity], Types.Int32) - self.contact_roffsets = sim.add_array('contact_roffsets', [self.neigh_capacity], Types.Int32) + self.nsend_contact = sim.add_array('nsend_contact', [dom_part.nranks_capacity], Types.Int32) + self.nrecv_contact = sim.add_array('nrecv_contact', [dom_part.nranks_capacity], Types.Int32) + self.contact_soffsets = sim.add_array('contact_soffsets', [dom_part.nranks_capacity], Types.Int32) + self.contact_roffsets = sim.add_array('contact_roffsets', [dom_part.nranks_capacity], Types.Int32) + + if self.sim.properties.reduction_props(): + self.nsend_reverse = sim.add_array('nsend_reverse', [dom_part.nranks_capacity], Types.Int32) + self.send_offsets_reverse = sim.add_array('send_offsets_reverse', [dom_part.nranks_capacity], Types.Int32) + self.send_buffer_reverse = sim.add_array('send_buffer_reverse', [self.send_capacity, self.elem_capacity], Types.Real, arr_sync=False) + self.nrecv_reverse = sim.add_array('nrecv_reverse', [dom_part.nranks_capacity], Types.Int32) + self.recv_offsets_reverse = sim.add_array('recv_offsets_reverse', [dom_part.nranks_capacity], Types.Int32) + self.recv_buffer_reverse = sim.add_array('recv_buffer_reverse', [self.recv_capacity, self.elem_capacity], Types.Real, arr_sync=False) + + +class Synchronize(Lowerable): + def __init__(self, comm): + self.sim = comm.sim + self.comm = comm @pairs_inline - def synchronize(self): + def lower(self): # Every property that is not constant across timesteps and have neighbor accesses during any # interaction kernel (i.e. property[j] in force calculation kernel) prop_names = ['position', 'linear_velocity', 'angular_velocity'] prop_list = [self.sim.property(p) for p in prop_names if self.sim.property(p) is not None] - PackAllGhostParticles(self, prop_list) - CommunicateAllData(self, prop_list) - UnpackAllGhostParticles(self, prop_list) + PackAllGhostParticles(self.comm, prop_list) + CommunicateAllData(self.comm, prop_list) + UnpackAllGhostParticles(self.comm, prop_list) + + +class Borders(Lowerable): + def __init__(self, comm): + self.sim = comm.sim + self.comm = comm @pairs_inline - def borders(self): + def lower(self): # Every property that has neighbor accesses during any interaction kernel (i.e. property[j] # exists in any force calculation kernel) # We ignore normal because there should be no ghost half-spaces prop_names = [ + 'flags', 'uid', 'type', 'mass', @@ -71,84 +91,128 @@ class Comm: prop_list = [self.sim.property(p) for p in prop_names if self.sim.property(p) is not None] - Assign(self.sim, self.nsend_all, 0) + Assign(self.sim, self.comm.nsend_all, 0) Assign(self.sim, self.sim.nghost, 0) - for step in range(self.dom_part.number_of_steps()): + for step in range(self.comm.dom_part.number_of_steps()): if self.sim._target.is_gpu(): - CopyArray(self.sim, self.nsend, Contexts.Host, Actions.Ignore) - CopyArray(self.sim, self.nrecv, Contexts.Host, Actions.Ignore) + CopyArray(self.sim, self.comm.nsend, Contexts.Host, Actions.Ignore) + CopyArray(self.sim, self.comm.nrecv, Contexts.Host, Actions.Ignore) - for j in self.dom_part.step_indexes(step): - Assign(self.sim, self.nsend[j], 0) - Assign(self.sim, self.nrecv[j], 0) + for j in self.comm.dom_part.step_indexes(step): + Assign(self.sim, self.comm.nsend[j], 0) + Assign(self.sim, self.comm.nrecv[j], 0) if self.sim._target.is_gpu(): - CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) - CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.comm.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.comm.nrecv, Contexts.Device, Actions.Ignore) - DetermineGhostParticles(self, step, self.sim.cell_spacing()) - CommunicateSizes(self, step) - SetCommunicationOffsets(self, step) - PackGhostParticles(self, step, prop_list) - CommunicateData(self, step, prop_list) - UnpackGhostParticles(self, step, prop_list) + DetermineGhostParticles(self.comm, step, self.sim.cell_spacing()) + CommunicateSizes(self.comm, step) + SetCommunicationOffsets(self.comm, step) + PackGhostParticles(self.comm, step, prop_list) + CommunicateData(self.comm, step, prop_list) + UnpackGhostParticles(self.comm, step, prop_list) - step_nrecv = sum([self.nrecv[j] for j in self.dom_part.step_indexes(step)]) + step_nrecv = self.comm.dom_part.reduce_sum_step_indexes(step, self.comm.nrecv) Assign(self.sim, self.sim.nghost, self.sim.nghost + step_nrecv) + +class Exchange(Lowerable): + def __init__(self, comm): + self.sim = comm.sim + self.comm = comm + @pairs_inline - def exchange(self): + def lower(self): # Every property except volatiles prop_list = self.sim.properties.non_volatiles() - for step in range(self.dom_part.number_of_steps()): - Assign(self.sim, self.nsend_all, 0) - Assign(self.sim, self.sim.nghost, 0) + for step in range(self.comm.dom_part.number_of_steps()): + Assign(self.comm.sim, self.comm.nsend_all, 0) + Assign(self.comm.sim, self.sim.nghost, 0) for s in range(step + 1): - for j in self.dom_part.step_indexes(s): - Assign(self.sim, self.nsend[j], 0) - Assign(self.sim, self.nrecv[j], 0) - Assign(self.sim, self.send_offsets[j], 0) - Assign(self.sim, self.recv_offsets[j], 0) - Assign(self.sim, self.nsend_contact[j], 0) - Assign(self.sim, self.nrecv_contact[j], 0) - Assign(self.sim, self.contact_soffsets[j], 0) - Assign(self.sim, self.contact_soffsets[j], 0) + for j in self.comm.dom_part.step_indexes(s): + Assign(self.comm.sim, self.comm.nsend[j], 0) + Assign(self.comm.sim, self.comm.nrecv[j], 0) + Assign(self.comm.sim, self.comm.send_offsets[j], 0) + Assign(self.comm.sim, self.comm.recv_offsets[j], 0) + Assign(self.comm.sim, self.comm.nsend_contact[j], 0) + Assign(self.comm.sim, self.comm.nrecv_contact[j], 0) + Assign(self.comm.sim, self.comm.contact_soffsets[j], 0) + Assign(self.comm.sim, self.comm.contact_soffsets[j], 0) if self.sim._target.is_gpu(): - CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) - CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + CopyArray(self.comm.sim, self.comm.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.comm.sim, self.comm.nrecv, Contexts.Device, Actions.Ignore) - DetermineGhostParticles(self, step, 0.0) - CommunicateSizes(self, step) - SetCommunicationOffsets(self, step) - PackGhostParticles(self, step, prop_list) + DetermineGhostParticles(self.comm, step, 0.0) + CommunicateSizes(self.comm, step) + SetCommunicationOffsets(self.comm, step) + PackGhostParticles(self.comm, step, prop_list) if self.sim._target.is_gpu(): - send_map_size = self.nsend_all * Sizeof(self.sim, Types.Int32) - exchg_flag_size = self.sim.nlocal * Sizeof(self.sim, Types.Int32) - CopyArray(self.sim, self.send_map, Contexts.Host, Actions.ReadOnly, send_map_size) - CopyArray(self.sim, self.exchg_flag, Contexts.Host, Actions.ReadOnly, exchg_flag_size) + send_map_size = self.comm.nsend_all * Sizeof(self.comm.sim, Types.Int32) + exchg_flag_size = self.sim.nlocal * Sizeof(self.comm.sim, Types.Int32) + CopyArray(self.comm.sim, self.comm.send_map, Contexts.Host, Actions.ReadOnly, send_map_size) + CopyArray(self.comm.sim, self.comm.exchg_flag, Contexts.Host, Actions.ReadOnly, exchg_flag_size) - RemoveExchangedParticles_part1(self) + RemoveExchangedParticles_part1(self.comm) if self.sim._target.is_gpu(): - exchg_copy_to_size = self.nsend_all * Sizeof(self.sim, Types.Int32) + exchg_copy_to_size = self.comm.nsend_all * Sizeof(self.comm.sim, Types.Int32) CopyArray( - self.sim, self.exchg_copy_to, Contexts.Device, Actions.ReadOnly, exchg_copy_to_size) + self.comm.sim, self.comm.exchg_copy_to, Contexts.Device, Actions.ReadOnly, exchg_copy_to_size) - RemoveExchangedParticles_part2(self, prop_list) - CommunicateData(self, step, prop_list) - UnpackGhostParticles(self, step, prop_list) + RemoveExchangedParticles_part2(self.comm, prop_list) + CommunicateData(self.comm, step, prop_list) + UnpackGhostParticles(self.comm, step, prop_list) if self.sim._use_contact_history: - PackContactHistoryData(self, step) - CommunicateContactHistoryData(self, step) - UnpackContactHistoryData(self, step) + PackContactHistoryData(self.comm, step) + CommunicateContactHistoryData(self.comm, step) + UnpackContactHistoryData(self.comm, step) + + ChangeSizeAfterExchange(self.comm, step) + + +class ReverseComm(Lowerable): + def __init__(self, comm, reduce=False): + self.sim = comm.sim + self.comm = comm + self.reduce = reduce + + @pairs_inline + def lower(self): + prop_list = self.sim.properties.reduction_props() + + if prop_list : + for step in range(self.comm.dom_part.number_of_steps() - 1, -1, -1): + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.comm.nsend, Contexts.Host, Actions.ReadOnly) + CopyArray(self.sim, self.comm.nrecv, Contexts.Host, Actions.ReadOnly) + CopyArray(self.sim, self.comm.send_offsets, Contexts.Host, Actions.ReadOnly) + CopyArray(self.sim, self.comm.recv_offsets, Contexts.Host, Actions.ReadOnly) + + CopyArray(self.sim, self.comm.nsend_reverse, Contexts.Host, Actions.WriteOnly) + CopyArray(self.sim, self.comm.nrecv_reverse, Contexts.Host, Actions.WriteOnly) + CopyArray(self.sim, self.comm.send_offsets_reverse, Contexts.Host, Actions.WriteOnly) + CopyArray(self.sim, self.comm.recv_offsets_reverse, Contexts.Host, Actions.WriteOnly) + + for j in self.comm.dom_part.step_indexes(step): + Assign(self.sim, self.comm.nsend_reverse[j], self.comm.nrecv[j]) + Assign(self.sim, self.comm.nrecv_reverse[j], self.comm.nsend[j]) + Assign(self.sim, self.comm.send_offsets_reverse[j], self.comm.recv_offsets[j]) + Assign(self.sim, self.comm.recv_offsets_reverse[j], self.comm.send_offsets[j]) + + PackGhostParticlesReverse(self.comm, step, prop_list) + CommunicateDataReverse(self.comm, step, prop_list) + UnpackGhostParticlesReverse(self.comm, step, prop_list, self.reduce) + + + - ChangeSizeAfterExchange(self, step) class CommunicateSizes(Lowerable): @@ -160,7 +224,7 @@ class CommunicateSizes(Lowerable): @pairs_inline def lower(self): - Call_Void(self.sim, "pairs->communicateSizes", [self.step, self.comm.nsend, self.comm.nrecv]) + Call_Void(self.sim, "pairs_runtime->communicateSizes", [self.step, self.comm.nsend, self.comm.nrecv]) class CommunicateData(Lowerable): @@ -176,12 +240,29 @@ class CommunicateData(Lowerable): elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) Call_Void(self.sim, - "pairs->communicateData", + "pairs_runtime->communicateData", [self.step, elem_size, self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend, self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) +class CommunicateDataReverse(Lowerable): + def __init__(self, comm, step, prop_list): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.prop_list = prop_list + self.sim.add_statement(self) + + @pairs_inline + def lower(self): + elem_size = sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + Call_Void(self.sim, + "pairs_runtime->communicateDataReverse", + [self.step, elem_size, + self.comm.send_buffer_reverse, self.comm.send_offsets_reverse, self.comm.nsend_reverse, + self.comm.recv_buffer_reverse, self.comm.recv_offsets_reverse, self.comm.nrecv_reverse]) + class CommunicateContactHistoryData(Lowerable): def __init__(self, comm, step): super().__init__(comm.sim) @@ -195,7 +276,7 @@ class CommunicateContactHistoryData(Lowerable): for cp in self.sim.contact_properties]) + 1 Call_Void(self.sim, - "pairs->communicateContactHistoryData", + "pairs_runtime->communicateContactHistoryData", [self.step, nelems_per_contact, self.comm.send_buffer, self.comm.contact_soffsets, self.comm.nsend_contact, self.comm.recv_buffer, self.comm.contact_roffsets, self.comm.nrecv_contact]) @@ -214,7 +295,7 @@ class CommunicateAllData(Lowerable): Call_Void( self.sim, - "pairs->communicateAllData", + "pairs_runtime->communicateAllData", [self.comm.dom_part.number_of_steps(), elem_size, self.comm.send_buffer, self.comm.send_offsets, self.comm.nsend, self.comm.recv_buffer, self.comm.recv_offsets, self.comm.nrecv]) @@ -241,6 +322,7 @@ class DetermineGhostParticles(Lowerable): self.sim.check_resize(self.comm.send_capacity, nsend) #self.sim.check_resize(self.comm.send_capacity, nsend_all) + # PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;") if is_exchange: for i in ParticleFor(self.sim): Assign(self.sim, exchg_flag[i], 0) @@ -274,18 +356,18 @@ class SetCommunicationOffsets(Lowerable): recv_offsets = self.comm.recv_offsets self.sim.module_name(f"set_communication_offsets{self.step}") - isend = 0 - irecv = 0 + isend = self.sim.add_temp_var(0) + irecv = self.sim.add_temp_var(0) for i in range(self.step): for j in self.comm.dom_part.step_indexes(i): - isend += nsend[j] - irecv += nrecv[j] + Assign(self.sim, isend, ScalarOp.inline(isend + nsend[j])) + Assign(self.sim, irecv, ScalarOp.inline(irecv + nrecv[j])) for j in self.comm.dom_part.step_indexes(self.step): Assign(self.sim, send_offsets[j], isend) Assign(self.sim, recv_offsets[j], irecv) - isend += nsend[j] - irecv += nrecv[j] + Assign(self.sim, isend, ScalarOp.inline(isend + nsend[j])) + Assign(self.sim, irecv, ScalarOp.inline(irecv + nrecv[j])) class PackGhostParticles(Lowerable): @@ -307,9 +389,9 @@ class PackGhostParticles(Lowerable): send_mult = self.comm.send_mult self.sim.module_name(f"pack_ghost_particles{self.step}_" + "_".join([str(p.id()) for p in self.prop_list])) - step_indexes = self.comm.dom_part.step_indexes(self.step) - start = self.comm.send_offsets[step_indexes[0]] - for i in For(self.sim, start, ScalarOp.inline(start + sum([self.comm.nsend[j] for j in step_indexes]))): + start = self.comm.send_offsets[self.comm.dom_part.first_step_index(self.step)] + end = ScalarOp.inline(start + self.comm.dom_part.reduce_sum_step_indexes(self.step, self.comm.nsend)) + for i in For(self.sim, start, end): p_offset = 0 m = send_map[i] for p in self.prop_list: @@ -329,6 +411,44 @@ class PackGhostParticles(Lowerable): Assign(self.sim, send_buffer[i][p_offset], cast_fn(p[m])) p_offset += 1 +class PackGhostParticlesReverse(Lowerable): + def __init__(self, comm, step, prop_list): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.prop_list = prop_list + self.sim.add_statement(self) + + def get_elems_per_particle(self): + return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + + @pairs_device_block + def lower(self): + nlocal = self.sim.nlocal + send_buffer_reverse = self.comm.send_buffer_reverse + send_buffer_reverse.set_stride(1, self.get_elems_per_particle()) + + self.sim.module_name(f"pack_ghost_particles_reverse{self.step}_" + "_".join([str(p.id()) for p in self.prop_list])) + + start = self.comm.send_offsets_reverse[self.comm.dom_part.first_step_index(self.step)] + end = ScalarOp.inline(start + self.comm.dom_part.reduce_sum_step_indexes(self.step, self.comm.nsend_reverse)) + for i in For(self.sim, start, end): + p_offset = 0 + m = nlocal + i + for p in self.prop_list: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + src = p[m][e] + Assign(self.sim, send_buffer_reverse[i][p_offset + e], src) + + p_offset += nelems + + else: + cast_fn = lambda x: Cast(self.sim, x, Types.Real) if p.type() != Types.Real else x + Assign(self.sim, send_buffer_reverse[i][p_offset], cast_fn(p[m])) + p_offset += 1 + class UnpackGhostParticles(Lowerable): def __init__(self, comm, step, prop_list): @@ -348,9 +468,9 @@ class UnpackGhostParticles(Lowerable): recv_buffer.set_stride(1, self.get_elems_per_particle()) self.sim.module_name(f"unpack_ghost_particles{self.step}_" + "_".join([str(p.id()) for p in self.prop_list])) - step_indexes = self.comm.dom_part.step_indexes(self.step) - start = self.comm.recv_offsets[step_indexes[0]] - for i in For(self.sim, start, ScalarOp.inline(start + sum([self.comm.nrecv[j] for j in step_indexes]))): + start = self.comm.recv_offsets[self.comm.dom_part.first_step_index(self.step)] + end = ScalarOp.inline(start + self.comm.dom_part.reduce_sum_step_indexes(self.step, self.comm.nrecv)) + for i in For(self.sim, start, end): p_offset = 0 for p in self.prop_list: if not Types.is_scalar(p.type()): @@ -365,6 +485,49 @@ class UnpackGhostParticles(Lowerable): Assign(self.sim, p[nlocal + i], cast_fn(recv_buffer[i][p_offset])) p_offset += 1 +class UnpackGhostParticlesReverse(Lowerable): + def __init__(self, comm, step, prop_list, reduce=False): + super().__init__(comm.sim) + self.comm = comm + self.step = step + self.prop_list = prop_list + self.reduce = reduce + self.sim.add_statement(self) + + + def get_elems_per_particle(self): + return sum([Types.number_of_elements(self.sim, p.type()) for p in self.prop_list]) + + @pairs_device_block + def lower(self): + send_map = self.comm.send_map + recv_buffer_reverse = self.comm.recv_buffer_reverse + recv_buffer_reverse.set_stride(1, self.get_elems_per_particle()) + self.sim.module_name(f"unpack_ghost_particles_reverse{self.step}_" + "_".join([str(p.id()) for p in self.prop_list])) + + start = self.comm.recv_offsets_reverse[self.comm.dom_part.first_step_index(self.step)] + end = ScalarOp.inline(start + self.comm.dom_part.reduce_sum_step_indexes(self.step, self.comm.nrecv_reverse)) + for i in For(self.sim, start, end): + p_offset = 0 + m = send_map[i] + for p in self.prop_list: + if not Types.is_scalar(p.type()): + nelems = Types.number_of_elements(self.sim, p.type()) + for e in range(nelems): + if self.reduce: + AtomicInc(self.sim, p[m][e], recv_buffer_reverse[i][p_offset + e]) + else: + Assign(self.sim, p[m][e], recv_buffer_reverse[i][p_offset + e]) + + p_offset += nelems + + else: + cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Real else x + if self.reduce: + AtomicInc(self.sim, p[m], cast_fn(recv_buffer_reverse[i][p_offset])) + else: + Assign(self.sim, p[m], cast_fn(recv_buffer_reverse[i][p_offset])) + p_offset += 1 class PackAllGhostParticles(Lowerable): def __init__(self, comm, prop_list): @@ -423,9 +586,7 @@ class UnpackAllGhostParticles(Lowerable): recv_buffer.set_stride(1, self.get_elems_per_particle()) self.sim.module_name(f"unpack_all_ghost_particles" + "_".join([str(p.id()) for p in self.prop_list])) - nrecv_size = sum([len(dom_part.step_indexes(s)) for s in range(dom_part.number_of_steps())]) - nrecv_all = sum([self.comm.nrecv[j] for j in range(nrecv_size)]) - + nrecv_all = self.comm.dom_part.reduce_sum_all_steps(self.comm.nrecv) for i in For(self.sim, 0, nrecv_all): p_offset = 0 for p in self.prop_list: @@ -517,7 +678,8 @@ class ChangeSizeAfterExchange(Lowerable): def lower(self): self.sim.module_name(f"change_size_after_exchange{self.step}") self.sim.check_resize(self.sim.particle_capacity, self.sim.nlocal) - Assign(self.sim, self.sim.nlocal, self.sim.nlocal + sum([self.comm.nrecv[j] for j in self.comm.dom_part.step_indexes(self.step)])) + nrecv = self.comm.dom_part.reduce_sum_step_indexes(self.step, self.comm.nrecv) + Assign(self.sim, self.sim.nlocal, self.sim.nlocal + nrecv) class PackContactHistoryData(Lowerable): @@ -612,7 +774,6 @@ class UnpackContactHistoryData(Lowerable): contact_used = self.sim._contact_history.contact_used self.sim.module_name(f"unpack_contact_history{self.step}") - step_indexes = self.comm.dom_part.step_indexes(self.step) nelems_per_contact = sum([Types.number_of_elements(self.sim, cp.type()) \ for cp in self.sim.contact_properties]) + 1 diff --git a/src/pairs/sim/contact_history.py b/src/pairs/sim/contact_history.py index 9a1a94d76e41ce8bd045ace9df45fcb45e0e8838..51b76f49d5bd9e2458f1af57d241a9b485a80bdc 100644 --- a/src/pairs/sim/contact_history.py +++ b/src/pairs/sim/contact_history.py @@ -4,7 +4,7 @@ from pairs.ir.branches import Branch, Filter from pairs.ir.loops import ParticleFor, For, While from pairs.ir.scalars import ScalarOp from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print from pairs.sim.interaction import NeighborFor from pairs.sim.lowerable import Lowerable diff --git a/src/pairs/sim/domain.py b/src/pairs/sim/domain.py index a30635397da67196f6d72b8c337d42e7a0062779..2560f1a8759727a4d7c0167da12955a88c67aa63 100644 --- a/src/pairs/sim/domain.py +++ b/src/pairs/sim/domain.py @@ -1,17 +1,18 @@ from pairs.ir.block import pairs_inline -from pairs.ir.functions import Call_Void -from pairs.ir.types import Types from pairs.sim.lowerable import Lowerable - class InitializeDomain(Lowerable): def __init__(self, sim): super().__init__(sim) @pairs_inline def lower(self): - dom_part = self.sim.domain_partitioning() - grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]), - Call_Void(self.sim, "pairs->fillCommunicationArrays", [dom_part.neighbor_ranks, dom_part.pbc, dom_part.subdom]) + self.sim.domain_partitioning().initialize() + +class UpdateDomain(Lowerable): + def __init__(self, sim): + super().__init__(sim) + @pairs_inline + def lower(self): + self.sim.domain_partitioning().update() diff --git a/src/pairs/sim/domain_partitioners.py b/src/pairs/sim/domain_partitioners.py index 6e00ad848f8496fbf8c24aa36ac3a06e813dcc07..d99c6cd8aba45f7e344309fe953cd53585053b97 100644 --- a/src/pairs/sim/domain_partitioners.py +++ b/src/pairs/sim/domain_partitioners.py @@ -2,10 +2,10 @@ class DomainPartitioners: Invalid = -1 Regular = 0 RegularXY = 1 - BoxList = 2 + BlockForest = 2 def c_keyword(layout): - return "Regular" if layout == DomainPartitioners.Regular else \ - "RegularXY" if layout == DomainPartitioners.RegularXY else \ - "BoxList" if layout == DomainPartitioners.BoxList else \ + return "RegularPartitioning" if layout == DomainPartitioners.Regular else \ + "RegularXYPartitioning" if layout == DomainPartitioners.RegularXY else \ + "BlockForestPartitioning" if layout == DomainPartitioners.BlockForest else \ "Invalid" diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index 901df44744426921cb896e9c2781caff73c5521d..485e1a6cbb8c8fe54dabdadd7efd0e7368c3d23b 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -1,20 +1,27 @@ -from pairs.ir.block import pairs_device_block, pairs_host_block -from pairs.ir.branches import Branch, Filter -from pairs.ir.loops import For, ParticleFor -from pairs.ir.utils import Print +from pairs.ir.assign import Assign +from pairs.ir.branches import Filter +from pairs.ir.loops import For +from pairs.ir.functions import Call_Int, Call_Void, Call from pairs.ir.scalars import ScalarOp from pairs.ir.select import Select from pairs.ir.types import Types from pairs.sim.flags import Flags -from pairs.sim.lowerable import Lowerable - - +from pairs.ir.lit import Lit +from pairs.sim.grid import MutableGrid +from pairs.ir.device import CopyArray +from pairs.ir.contexts import Contexts +from pairs.ir.actions import Actions +from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms +from pairs.ir.print import PrintCode class DimensionRanges: def __init__(self, sim): - self.sim = sim - self.neighbor_ranks = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32) - self.pbc = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32) - self.subdom = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real) + self.sim = sim + self.nranks = 6 + self.nranks_capacity = self.nranks + self.neighbor_ranks = sim.add_static_array('neighbor_ranks', [sim.ndims() * 2], Types.Int32) + self.pbc = sim.add_static_array('pbc', [sim.ndims() * 2], Types.Int32) + self.subdom = sim.add_static_array('subdom', [sim.ndims() * 2], Types.Real) + self.rank = sim.add_var('rank', Types.Int32) def min(self, dim): return self.subdom[dim * 2 + 0] @@ -28,6 +35,33 @@ class DimensionRanges: def step_indexes(self, step): return [step * 2 + 0, step * 2 + 1] + def first_step_index(self, step): + return self.step_indexes(step)[0] + + def reduce_sum_all_steps(self, array): + total_size = sum([len(self.step_indexes(s)) for s in range(self.number_of_steps())]) + return sum([array[i] for i in range(total_size)]) + + def reduce_sum_step_indexes(self, step, array): + return sum([array[i] for i in self.step_indexes(step)]) + + def initialize(self): + grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] + Call_Void(self.sim, "pairs_runtime->initDomain", grid_array) + + def update(self): + Call_Void(self.sim, "pairs_runtime->updateDomain", []) + Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", [])) + + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['neighbor_ranks', self.neighbor_ranks, self.sim.ndims() * 2]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['pbc', self.pbc, self.sim.ndims() * 2]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) + + if isinstance(self.sim.grid, MutableGrid): + for d in range(self.sim.dims): + Assign(self.sim, self.sim.grid.min(d), Call(self.sim, "pairs_runtime->getDomainPartitioner()->getMin", [d], Types.Real)) + Assign(self.sim, self.sim.grid.max(d), Call(self.sim, "pairs_runtime->getDomainPartitioner()->getMax", [d], Types.Real)) + def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored flags_to_exclude = (Flags.Infinite | Flags.Global) @@ -40,11 +74,8 @@ class DimensionRanges: pbc_shifts = [0 if d != step else self.pbc[j] for d in range(self.sim.ndims())] yield i, j, self.neighbor_ranks[j], pbc_shifts - - def prev_neighbor(self, j, step, position, offset, flags_to_exclude): particle_flags = self.sim.particle_flags - j = step * 2 + 1 for i in For(self.sim, 0, self.sim.nlocal + self.sim.nghost): for _ in Filter(self.sim, ScalarOp.cmp(particle_flags[i] & flags_to_exclude, 0)): for _ in Filter(self.sim, position[i][step] > self.subdom[j] - offset): @@ -63,3 +94,160 @@ class DimensionRanges: j = step * 2 + 1 for _ in Filter(self.sim, ScalarOp.inline(ScalarOp.cmp(self.pbc[j], 0))): yield from prev_neighbor(self, j, step, position, offset, flags_to_exclude) + + +class BlockForest: + def __init__(self, sim): + self.sim = sim + self.load_balancer = None + self.regrid_min = None + self.regrid_max = None + self.reduce_step = sim.add_var('reduce_step', Types.Int32) # this var is treated as a tmp (workaround for gpu) + self.reduce_step.force_read = True + self.rank = sim.add_var('rank', Types.Int32) + self.nranks = sim.add_var('nranks', Types.Int32) + self.nranks_capacity = sim.add_var('nranks_capacity', Types.Int32, init_value=27) + self.ntotal_aabbs = sim.add_var('ntotal_aabbs', Types.Int32) + self.aabb_capacity = sim.add_var('aabb_capacity', Types.Int32, init_value=27) + self.ranks = sim.add_array('ranks', [self.nranks_capacity], Types.Int32) + self.naabbs = sim.add_array('naabbs', [self.nranks_capacity], Types.Int32) + self.aabb_offsets = sim.add_array('aabb_offsets', [self.nranks_capacity], Types.Int32) + self.aabbs = sim.add_array('aabbs', [self.aabb_capacity, 6], Types.Real) + self.subdom = sim.add_array('subdom', [sim.ndims() * 2], Types.Real) + + def min(self, dim): + return self.subdom[dim * 2 + 0] + + def max(self, dim): + return self.subdom[dim * 2 + 1] + + def number_of_steps(self): + return 1 + + def step_indexes(self, step): + yield from For(self.sim, 0, self.nranks, not_kernel=True) + + def first_step_index(self, step): + return 0 + + def reduce_sum_all_steps(self, array): + return self.reduce_sum_step_indexes(0, array) + + def reduce_sum_step_indexes(self, step, array): + Assign(self.sim, self.reduce_step, 0) + for i in For(self.sim, 0, self.nranks, not_kernel=True): + Assign(self.sim, self.reduce_step, ScalarOp.inline( self.reduce_step + array[i])) + + return self.reduce_step + + def initialize(self): + grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] + + Call_Void(self.sim, "pairs_runtime->initDomain", + grid_array + self.sim._pbc + ([True] if self.load_balancer is not None else [])) + + if self.load_balancer is not None: + PrintCode(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer" + f"({LoadBalancingAlgorithms.c_keyword(self.load_balancer)}, {self.regrid_min}, {self.regrid_max});") + + # Call_Void(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer", + # [self.load_balancer, self.regrid_min, self.regrid_max]) + + def update(self): + Call_Void(self.sim, "pairs_runtime->updateDomain", []) + Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", [])) + Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborRanks", [])) + + for _ in Filter(self.sim, ScalarOp.neq(self.nranks, 0)): + Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", [])) + + for _ in Filter(self.sim, self.nranks_capacity < self.nranks): + Assign(self.sim, self.nranks_capacity, self.nranks + 10) + self.ranks.realloc() + self.naabbs.realloc() + self.aabb_offsets.realloc() + + for _ in Filter(self.sim, self.aabb_capacity < self.ntotal_aabbs): + Assign(self.sim, self.aabb_capacity, self.ntotal_aabbs + 20) + self.aabbs.realloc() + + CopyArray(self.sim, self.ranks, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.naabbs, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.aabb_offsets, Contexts.Host, Actions.WriteOnly, self.nranks) + CopyArray(self.sim, self.aabbs, Contexts.Host, Actions.WriteOnly, self.ntotal_aabbs * 6) + CopyArray(self.sim, self.subdom, Contexts.Host, Actions.WriteOnly) + + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['ranks', self.ranks, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabb_offsets', self.aabb_offsets, self.nranks]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabbs', self.aabbs, self.ntotal_aabbs * 6]) + Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) + + if isinstance(self.sim.grid, MutableGrid): + for d in range(self.sim.dims): + Assign(self.sim, self.sim.grid.min(d), Call(self.sim, "pairs_runtime->getDomainPartitioner()->getMin", [d], Types.Real)) + Assign(self.sim, self.sim.grid.max(d), Call(self.sim, "pairs_runtime->getDomainPartitioner()->getMax", [d], Types.Real)) + + def ghost_particles(self, step, position, offset=0.0): + ''' TODO : If we have pbc, a sinlge particle can be a ghost particle multiple times (at different locations) for the same neighbor block, + so this function should have the capability to yield more than one particle for every neighbor. + But currently it doesn't have that capability, so we need at least 2 blocks in the dimensions that we have pbc. + (eg: a particle in a 1x1x1 block config with pbc <ture, true, true> can be ghost at 7 other locations) + ''' + # Particles with one of the following flags are ignored + flags_to_exclude = (Flags.Infinite | Flags.Global) + + for r in self.step_indexes(0): # for every neighbor rank + for i in For(self.sim, 0, self.sim.nlocal): # for every local particle in this rank + particle_flags = self.sim.particle_flags + + for _ in Filter(self.sim, ScalarOp.cmp(particle_flags[i] & flags_to_exclude, 0)): + for aabb_id in For(self.sim, self.aabb_offsets[r], self.aabb_offsets[r] + self.naabbs[r]): # for every aabb of this neighbor + for _ in Filter(self.sim, ScalarOp.neq(self.ranks[r] , self.rank)): # if my neighobr is not my own rank + full_cond = None + pbc_shifts = [] + + for d in range(self.sim.ndims()): + aabb_min = self.aabbs[aabb_id][d * 2 + 0] + aabb_max = self.aabbs[aabb_id][d * 2 + 1] + d_pbc = 0 + d_length = self.sim.grid.length(d) + + if self.sim._pbc[d]: + center = aabb_min + (aabb_max - aabb_min) * 0.5 # center of neighbor block + dist = position[i][d] - center # distance of our particle from center of neighbor + cond_pbc_neg = dist > (d_length * 0.5) + cond_pbc_pos = dist < -(d_length * 0.5) + + d_pbc = Select(self.sim, cond_pbc_neg, -1, Select(self.sim, cond_pbc_pos, 1, 0)) + + adj_pos = position[i][d] + d_pbc * d_length + d_cond = ScalarOp.and_op(adj_pos > aabb_min - offset, adj_pos < aabb_max + offset) + full_cond = d_cond if full_cond is None else ScalarOp.and_op(full_cond, d_cond) + pbc_shifts.append(d_pbc) + + for _ in Filter(self.sim, full_cond): + yield i, r, self.ranks[r], pbc_shifts + + for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)): # if my neighbor is me (cuz I'm the only rank in a dimension that has pbc) + pbc_shifts = [] + isghost = Lit(self.sim, 0) + + for d in range(self.sim.ndims()): + aabb_min = self.aabbs[aabb_id][d * 2 + 0] + aabb_max = self.aabbs[aabb_id][d * 2 + 1] + center = aabb_min + (aabb_max - aabb_min) * 0.5 # center of neighbor block + dist = position[i][d] - center # distance of our particle from center of neighbor + d_pbc = 0 + d_length = self.sim.grid.length(d) + + if self.sim._pbc[d]: + cond_pbc_neg = dist > (d_length*0.5 - offset) + cond_pbc_pos = dist < -(d_length*0.5 - offset) + d_pbc = Select(self.sim, cond_pbc_neg, -1, Select(self.sim, cond_pbc_pos, 1, 0)) + isghost = ScalarOp.or_op(isghost, d_pbc) + + pbc_shifts.append(d_pbc) + + for _ in Filter(self.sim, isghost): + yield i, r, self.ranks[r], pbc_shifts diff --git a/src/pairs/sim/instrumentation.py b/src/pairs/sim/instrumentation.py index dedc7c18e940796e035aa97ccd3ff527e370438f..7281f13fc9f848038c4df34248d65db7e8e13c8c 100644 --- a/src/pairs/sim/instrumentation.py +++ b/src/pairs/sim/instrumentation.py @@ -13,7 +13,7 @@ class RegisterTimers(FinalLowerable): Call_Void(self.sim, "pairs::register_timer", [t, Timers.name(t)]) for m in self.sim.module_list: - if m.name != 'main': + if m.name != 'main' and m.name != 'initialize': Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, m.name]) @@ -25,5 +25,5 @@ class RegisterMarkers(FinalLowerable): def lower(self): if self.sim._enable_profiler: for m in self.sim.module_list: - if m.name != 'main' and m.must_profile(): + if m.name != 'main' and m.name != 'initialize' and m.must_profile(): Call_Void(self.sim, "LIKWID_MARKER_REGISTER", [m.name]) diff --git a/src/pairs/sim/load_balancing_algorithms.py b/src/pairs/sim/load_balancing_algorithms.py new file mode 100644 index 0000000000000000000000000000000000000000..165d151cf4c936ef2184ab9dabbb98b0e634df2f --- /dev/null +++ b/src/pairs/sim/load_balancing_algorithms.py @@ -0,0 +1,13 @@ +class LoadBalancingAlgorithms: + Morton = 0 + Hilbert = 1 + Diffusive = 3 + Metis = 2 + + def c_keyword(algorithm): + return "Hilbert" if algorithm == LoadBalancingAlgorithms.Hilbert else \ + "Morton" if algorithm == LoadBalancingAlgorithms.Morton else \ + "Diffusive" if algorithm == LoadBalancingAlgorithms.Diffusive else \ + "Metis" if algorithm == LoadBalancingAlgorithms.Metis else \ + "Invalid" + \ No newline at end of file diff --git a/src/pairs/sim/neighbor_lists.py b/src/pairs/sim/neighbor_lists.py index 5662522b2c2d178319b6aea8f0ad12d92d0960f9..bc50e7e796355f3d7163a8b8b9080e83a58810f1 100644 --- a/src/pairs/sim/neighbor_lists.py +++ b/src/pairs/sim/neighbor_lists.py @@ -4,7 +4,7 @@ from pairs.ir.branches import Branch, Filter from pairs.ir.layouts import Layouts from pairs.ir.loops import ParticleFor from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print from pairs.sim.interaction import ParticleInteraction from pairs.sim.lowerable import Lowerable diff --git a/src/pairs/sim/properties.py b/src/pairs/sim/properties.py index 775fe19a69a39c0c8278a3aa4a77228b3073e897..85eb027763f203a9fdfea9b0d31db826944b46c4 100644 --- a/src/pairs/sim/properties.py +++ b/src/pairs/sim/properties.py @@ -4,7 +4,7 @@ from pairs.ir.loops import ParticleFor from pairs.ir.memory import Malloc, Realloc from pairs.ir.properties import RegisterProperty, RegisterContactProperty from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print from pairs.sim.lowerable import Lowerable, FinalLowerable from functools import reduce import operator diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py index 291f085666fdb6cdac4e4f3d3b3a28969ab76c78..f7360b4e4e8d28c6f4a5407f5af9162188c0a890 100644 --- a/src/pairs/sim/simulation.py +++ b/src/pairs/sim/simulation.py @@ -4,7 +4,7 @@ from pairs.ir.branches import Filter from pairs.ir.features import Features, FeatureProperties from pairs.ir.kernel import Kernel from pairs.ir.layouts import Layouts -from pairs.ir.module import Module +from pairs.ir.module import Module, ModuleCall from pairs.ir.properties import Properties, ContactProperties from pairs.ir.symbols import Symbol from pairs.ir.types import Types @@ -13,15 +13,16 @@ from pairs.ir.variables import Variables from pairs.mapping.funcs import compute, setup from pairs.sim.arrays import DeclareArrays from pairs.sim.cell_lists import CellLists, BuildCellLists, BuildCellListsStencil, PartitionCellLists, BuildCellNeighborLists -from pairs.sim.comm import Comm +from pairs.sim.comm import Comm, Synchronize, Borders, Exchange, ReverseComm from pairs.sim.contact_history import ContactHistory, BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus from pairs.sim.copper_fcc_lattice import CopperFCCLattice from pairs.sim.dem_sc_grid import DEMSCGrid -from pairs.sim.domain import InitializeDomain +from pairs.sim.domain import InitializeDomain, UpdateDomain from pairs.sim.domain_partitioners import DomainPartitioners -from pairs.sim.domain_partitioning import DimensionRanges +from pairs.sim.domain_partitioning import BlockForest, DimensionRanges +from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms from pairs.sim.features import AllocateFeatureProperties -from pairs.sim.grid import Grid2D, Grid3D +from pairs.sim.grid import Grid2D, Grid3D, MutableGrid from pairs.sim.instrumentation import RegisterMarkers, RegisterTimers from pairs.sim.lattice import ParticleLattice from pairs.sim.neighbor_lists import NeighborLists, BuildNeighborLists @@ -32,9 +33,12 @@ from pairs.sim.timestep import Timestep from pairs.sim.variables import DeclareVariables from pairs.sim.vtk import VTKWrite from pairs.transformations import Transformations +from pairs.code_gen.interface import InterfaceModules class Simulation: + """P4IRS Simulation class, this class is the center of kernel simulations which contains all + fundamental data structures to generate a P4IRS simulation code""" def __init__( self, code_gen, @@ -44,10 +48,15 @@ class Simulation: double_prec=False, use_contact_history=False, particle_capacity=800000, - neighbor_capacity=100): + neighbor_capacity=100, + generate_whole_program=False): + # Code generator for the simulation self.code_gen = code_gen self.code_gen.assign_simulation(self) + self._generate_whole_program = generate_whole_program + + # Data structures to be generated self.position_prop = None self.properties = Properties(self) self.vars = Variables(self) @@ -55,60 +64,104 @@ class Simulation: self.features = Features(self) self.feature_properties = FeatureProperties(self) self.contact_properties = ContactProperties(self) - self.particle_capacity = self.add_var('particle_capacity', Types.Int32, particle_capacity) + + # General capacities, sizes and particle properties + self.sim_timestep = self.add_var('sim_timestep', Types.Int32, runtime=True) + self.particle_capacity = \ + self.add_var('particle_capacity', Types.Int32, particle_capacity, runtime=True) self.neighbor_capacity = self.add_var('neighbor_capacity', Types.Int32, neighbor_capacity) - self.nlocal = self.add_var('nlocal', Types.Int32) - self.nghost = self.add_var('nghost', Types.Int32) + self.nlocal = self.add_var('nlocal', Types.Int32, runtime=True) + self.nghost = self.add_var('nghost', Types.Int32, runtime=True) self.resizes = self.add_array('resizes', 3, Types.Int32, arr_sync=False) - self.particle_uid = self.add_property('uid', Types.Int32, 0) + self.particle_uid = self.add_property('uid', Types.UInt64, 0) self.particle_shape = self.add_property('shape', Types.Int32, 0) self.particle_flags = self.add_property('flags', Types.Int32, 0) + + # Grid for the simulation self.grid = None + + # Acceleration structures self.cell_lists = None self._store_neighbors_per_cell = False self.neighbor_lists = None + self.update_cells_procedures = Block(self, []) + + # Context information used to partially build the program AST self.scope = [] self.nested_count = 0 self.nest = False self._capture_statements = True self._block = Block(self, []) - self.setups = Block(self, []) + + # Different segments of particle code/functions + self.create_domain = Block(self, []) + self.create_domain_at_initialization = False + + self.setup_particles = Block(self, []) + self.module_list = [] + self.kernel_list = [] + + # Individual user-defined and interface modules are created only when generate_whole_program is False + self.udf_module_list = [] + self.interface_module_list = [] + + # User-defined functions to be called by other subroutines (used only when generate_whole_program is True) self.setup_functions = [] self.pre_step_functions = [] self.functions = [] - self.module_list = [] - self.kernel_list = [] + + # Structures to generated resize code for capacities self._check_properties_resize = False self._resizes_to_check = {} - self._module_name = None - self._double_prec = double_prec - self.dims = dims - self.ntimesteps = timesteps - self.expr_id = 0 - self.iter_id = 0 - self.reneighbor_frequency = 1 + + # VTK data self.vtk_file = None self.vtk_frequency = 0 + + # Domain partitioning self._dom_part = None self._partitioner = None - self._target = None - self._pbc = [True for _ in range(dims)] + self._comm = None + + # Contact history self._use_contact_history = use_contact_history self._contact_history = ContactHistory(self) if use_contact_history else None - self._shapes = shapes - self._compute_half = False - self._apply_list = None - self._enable_profiler = False - self._compute_thermo = 0 + + + self._module_name = None # Current module name + self._double_prec = double_prec # Use double-precision FP arithmetic + self.dims = dims # Number of dimensions + self.ntimesteps = timesteps # Number of time-steps + self.reneighbor_frequency = 1 # Re-neighbor frequency + self.rebalance_frequency = 0 # Re-balance frequency for dynamic load balancing + self._target = None # Hardware target info + self._pbc = [True for _ in range(dims)] # PBC flags for each dimension + self._shapes = shapes # List of shapes used in the simulation + self._compute_half = False # Compute half of interactions (Newton 3D Law) + self._apply_list = None # Context elements when using apply() directive + self._enable_profiler = False # Enable/disable profiler + self._compute_thermo = 0 # Compute thermo information def set_domain_partitioner(self, partitioner): + """Selects domain-partitioner used and create its object for this simulation instance""" self._partitioner = partitioner if partitioner in (DomainPartitioners.Regular, DomainPartitioners.RegularXY): self._dom_part = DimensionRanges(self) + elif partitioner == DomainPartitioners.BlockForest: + self._dom_part = BlockForest(self) + else: raise Exception("Invalid domain partitioner.") + + def set_workload_balancer(self, algorithm=LoadBalancingAlgorithms.Morton, + regrid_min=100, regrid_max=1000, rebalance_frequency=0): + assert self._partitioner == DomainPartitioners.BlockForest, "Load balancing is only supported by BlockForest." + self.rebalance_frequency = rebalance_frequency + self._dom_part.load_balancer = algorithm + self._dom_part.regrid_min = regrid_min + self._dom_part.regrid_max = regrid_max def partitioner(self): return self._partitioner @@ -128,12 +181,33 @@ class Simulation: def max_shapes(self): return len(self._shapes) + def add_udf_module(self, module): + assert isinstance(module, Module), "add_udf_module(): Given parameter is not of type Module!" + assert module.user_defined and not module.interface + if module.name not in [m.name for m in self.udf_module_list]: + self.udf_module_list.append(module) + + def add_interface_module(self, module): + assert isinstance(module, Module), "add_interface_module(): Given parameter is not of type Module!" + assert module.interface and not module.user_defined + if module.name not in [m.name for m in self.interface_module_list]: + self.interface_module_list.append(module) + def add_module(self, module): assert isinstance(module, Module), "add_module(): Given parameter is not of type Module!" + assert not module.interface and not module.user_defined if module.name not in [m.name for m in self.module_list]: self.module_list.append(module) + def interface_modules(self): + return self.interface_module_list + + def udf_modules(self): + return self.udf_module_list + def modules(self): + """List simulation modules, with main always in the last position""" + sorted_mods = [] main_mod = None for m in self.module_list: @@ -142,7 +216,10 @@ class Simulation: else: main_mod = m - return sorted_mods + [main_mod] + if main_mod is not None: + sorted_mods += [main_mod] + + return sorted_mods def add_kernel(self, kernel): assert isinstance(kernel, Kernel), "add_kernel(): Given parameter is not of type Kernel!" @@ -163,9 +240,9 @@ class Simulation: assert len(pbc_config) == self.dims, "PBC must be specified for each dimension." self._pbc = pbc_config - def add_property(self, prop_name, prop_type, value=0.0, volatile=False): + def add_property(self, prop_name, prop_type, value=0.0, volatile=False, reduce=False): assert self.property(prop_name) is None, f"Property already defined: {prop_name}" - return self.properties.add(prop_name, prop_type, value, volatile) + return self.properties.add(prop_name, prop_type, value, volatile, p_reduce=reduce) def add_position(self, prop_name, value=[0.0, 0.0, 0.0], volatile=False, layout=Layouts.AoS): assert self.property(prop_name) is None, f"Property already defined: {prop_name}" @@ -176,10 +253,18 @@ class Simulation: assert self.feature(feature_name) is None, f"Feature already defined: {feature_name}" return self.features.add(feature_name, nkinds) - def add_feature_property(self, feature_name, prop_name, prop_type, prop_data): + def add_feature_property(self, feature_name, prop_name, prop_type, prop_data=None): feature = self.feature(feature_name) assert feature is not None, f"Feature not found: {feature_name}" assert self.property(prop_name) is None, f"Property already defined: {prop_name}" + + array_size = feature.nkinds()**2 * Types.number_of_elements(self, prop_type) + + if not prop_data: + prop_data = [0 for i in range(array_size)] + else: + assert len(prop_data) == array_size, f"Incorrect array size for {prop_name}: Expected array size = {array_size}" + return self.feature_properties.add(feature, prop_name, prop_type, prop_data) def add_contact_property(self, prop_name, prop_type, prop_default, layout=Layouts.AoS): @@ -212,9 +297,9 @@ class Simulation: def array(self, arr_name): return self.arrays.find(arr_name) - def add_var(self, var_name, var_type, init_value=0): + def add_var(self, var_name, var_type, init_value=0, runtime=False): assert self.var(var_name) is None, f"Variable already defined: {var_name}" - return self.vars.add(var_name, var_type, init_value) + return self.vars.add(var_name, var_type, init_value, runtime) def add_temp_var(self, init_value): return self.vars.add_temp(init_value) @@ -226,33 +311,46 @@ class Simulation: return self.vars.find(var_name) def set_domain(self, grid): + """Set domain bounds. + If the domain is set through this function, the 'set_domain' module won't be generated in the modular version. + Use this function only if you do not need to set domain at runtime. + This function is required only for whole-program generation.""" + self.create_domain_at_initialization = True self.grid = Grid3D(self, grid[0], grid[1], grid[2], grid[3], grid[4], grid[5]) - self.setups.add_statement(InitializeDomain(self)) + self.create_domain.add_statement(InitializeDomain(self)) def reneighbor_every(self, frequency): self.reneighbor_frequency = frequency def create_particle_lattice(self, grid, spacing, props={}): - self.setups.add_statement(ParticleLattice(self, grid, spacing, props, self.position())) + self.setup_particles.add_statement(ParticleLattice(self, grid, spacing, props, self.position())) def read_particle_data(self, filename, prop_names, shape_id): + """Generate statement to read particle data from file""" props = [self.property(prop_name) for prop_name in prop_names] - self.setups.add_statement(ReadParticleData(self, filename, props, shape_id)) + self.setup_particles.add_statement(ReadParticleData(self, filename, props, shape_id)) def copper_fcc_lattice(self, nx, ny, nz, rho, temperature, ntypes): - self.setups.add_statement(CopperFCCLattice(self, nx, ny, nz, rho, temperature, ntypes)) + """Specific initialization for MD Copper FCC lattice case""" + self.setup_particles.add_statement(CopperFCCLattice(self, nx, ny, nz, rho, temperature, ntypes)) def dem_sc_grid(self, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, initial_velocity, particle_density, ntypes): - self.setups.add_statement( + """Specific initialization for DEM grid""" + self.setup_particles.add_statement( DEMSCGrid(self, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, initial_velocity, particle_density, ntypes)) - def build_cell_lists(self, spacing, store_neighbors_per_cell=False): + def build_cell_lists(self, spacing=None, store_neighbors_per_cell=False): + """Add routines to build the linked-cells acceleration structure. + Leave spacing as None so it can be set at runtime.""" self._store_neighbors_per_cell = store_neighbors_per_cell self.cell_lists = CellLists(self, self._dom_part, spacing, spacing) return self.cell_lists - def build_neighbor_lists(self, spacing): + def build_neighbor_lists(self, spacing=None): + """Add routines to build the Verlet Lists acceleration structure. + Leave spacing as None so it can be set at runtime.""" + assert self._store_neighbors_per_cell is False, \ "Using neighbor-lists with store_neighbors_per_cell option is invalid." @@ -260,13 +358,14 @@ class Simulation: self.neighbor_lists = NeighborLists(self, self.cell_lists) return self.neighbor_lists - def compute(self, func, cutoff_radius=None, symbols={}, pre_step=False, skip_first=False): - return compute(self, func, cutoff_radius, symbols, pre_step, skip_first) + def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=False, skip_first=False): + return compute(self, func, cutoff_radius, symbols, parameters, pre_step, skip_first) def setup(self, func, symbols={}): return setup(self, func, symbols) def init_block(self): + """Initialize new block in this simulation instance""" self._block = Block(self, []) self._check_properties_resize = False self._resizes_to_check = {} @@ -276,24 +375,30 @@ class Simulation: self._module_name = name def check_properties_resize(self): + """Enable checking properties for resizing""" self._check_properties_resize = True def check_resize(self, capacity, size): + """Determine that capacity must always be checked with respect to size in a block/module""" + if capacity not in self._resizes_to_check: self._resizes_to_check[capacity] = size else: raise Exception("Two sizes assigned to same capacity!") def build_setup_module_with_statements(self): + """Build a Module in the setup part of the program using the last initialized block""" + self.setup_functions.append( Module(self, name=self._module_name, block=Block(self, self._block), resizes_to_check=self._resizes_to_check, check_properties_resize=self._check_properties_resize, - run_on_device=False)) + run_on_device=True)) def build_pre_step_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): + """Build a Module in the pre-step part of the program using the last initialized block""" module = Module(self, name=self._module_name, block=Block(self, self._block), resizes_to_check=self._resizes_to_check, @@ -310,6 +415,7 @@ class Simulation: self.pre_step_functions.append(module) def build_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): + """Build a Module in the compute part of the program using the last initialized block""" module = Module(self, name=self._module_name, block=Block(self, self._block), resizes_to_check=self._resizes_to_check, @@ -324,10 +430,22 @@ class Simulation: else: self.functions.append(module) + def build_user_defined_function(self, run_on_device=True): + """Build a user-defined Module that will be callable seperately as part of the interface""" + Module(self, name=self._module_name, + block=Block(self, self._block), + resizes_to_check=self._resizes_to_check, + check_properties_resize=self._check_properties_resize, + run_on_device=run_on_device, + user_defined=True) + + def capture_statements(self, capture=True): + """When toggled, all constructed statements are captured and automatically added to the last initialized block""" self._capture_statements = capture def add_statement(self, stmt): + """Add captured statements to the last block when _capture_statements is toggled""" if self._capture_statements: if not self.scope: self._block.add_statement(stmt) @@ -337,6 +455,7 @@ class Simulation: return stmt def nest_mode(self): + """When explicitly constructing loops in P4IRS, make them nested""" self.nested_count = 0 self.nest = True yield @@ -345,9 +464,11 @@ class Simulation: self.scope.pop() def enter(self, scope): + """Enter a new scope, used for tracking scopes when building P4IRS AST elements""" self.scope.append(scope) def leave(self): + """Leave last scope, used for tracking scopes when building P4IRS AST elements""" if not self.nest: self.scope.pop() else: @@ -379,26 +500,86 @@ class Simulation: def compute_thermo(self, every=0): self._compute_thermo = every + def create_update_cells_block(self): + subroutines = [ + BuildCellLists(self, self.cell_lists), + PartitionCellLists(self, self.cell_lists) + ] + + # Add routine to build neighbor-lists per cell + if self._store_neighbors_per_cell: + subroutines.append(BuildCellNeighborLists(self, self.cell_lists)) + + # Add routine to build neighbor-lists per particle (standard Verlet Lists) + if self.neighbor_lists is not None: + subroutines.append(BuildNeighborLists(self, self.neighbor_lists)) + + self.update_cells_procedures.add_statement(subroutines) + def generate(self): + """Generate the code for the simulation""" assert self._target is not None, "Target not specified!" - comm = Comm(self, self._dom_part) + + # Initialize communication instance with the specified domain-partitioner + self._comm = Comm(self, self._dom_part) + self.create_update_cells_block() + + if self._generate_whole_program: + self.generate_program() + else: + self.generate_library() + + def generate_library(self): + InterfaceModules(self).create_all() + + # User defined functions are wrapped inside seperate interface modules here. + # The udf's have the same name as their interface module but they get implemented in the pairs::internal scope. + for m in self.udf_module_list: + module = Module(self, name=m.name, block=Block(self, m), interface=True) + module._id = m._id + + Transformations(self.interface_modules(), self._target).apply_all() + + # Generate library + self.code_gen.generate_library() + + # Generate getters for the runtime functions + self.code_gen.generate_interfaces() + + def generate_program(self): + assert self.grid, "No domain is created. Set domain bounds with 'set_domain'." + + reverse_comm_module = ReverseComm(self._comm, reduce=True) + + # Params that determine when a method must be called only when reneighboring every_reneighbor_params = {'every': self.reneighbor_frequency} - timestep_procedures = self.pre_step_functions + [ - (comm.exchange(), every_reneighbor_params), - (comm.borders(), comm.synchronize(), every_reneighbor_params), - (BuildCellLists(self, self.cell_lists), every_reneighbor_params), - (PartitionCellLists(self, self.cell_lists), every_reneighbor_params) - ] + timestep_procedures = [] - if self._store_neighbors_per_cell: - timestep_procedures.append( - (BuildCellNeighborLists(self, self.cell_lists), every_reneighbor_params)) + # First steps executed during each time-step in the simulation + timestep_procedures += self.pre_step_functions - if self.neighbor_lists is not None: - timestep_procedures.append( - (BuildNeighborLists(self, self.neighbor_lists), every_reneighbor_params)) + # Rebalancing routines + if self.rebalance_frequency: + update_domain_procedures = Block.from_list(self, [ + Exchange(self._comm), + UpdateDomain(self), + Borders(self._comm), + ResetVolatileProperties(self), + BuildCellListsStencil(self, self.cell_lists), + self.update_cells_procedures + ]) + + timestep_procedures.append((update_domain_procedures, {'every': self.rebalance_frequency})) + # Communication routines + timestep_procedures += [(Exchange(self._comm), every_reneighbor_params), + (Borders(self._comm), Synchronize(self._comm), every_reneighbor_params)] + + # Update acceleration data structures + timestep_procedures += [(self.update_cells_procedures, every_reneighbor_params)] + + # Add routines for contact history management if self._use_contact_history: if self.neighbor_lists is not None: timestep_procedures.append( @@ -407,46 +588,66 @@ class Simulation: timestep_procedures.append(ResetContactHistoryUsageStatus(self, self._contact_history)) - timestep_procedures += [ResetVolatileProperties(self)] + self.functions + # Reset volatile properties + timestep_procedures += [ResetVolatileProperties(self)] + + # Add computational kernels + timestep_procedures += self.functions + # For whole-program-generation, add reverse_comm wherever needed in the timestep loop (eg: after computational kernels) like this: + timestep_procedures += [reverse_comm_module] + + # Clear unused contact history if self._use_contact_history: timestep_procedures.append(ClearUnusedContactHistory(self, self._contact_history)) + # Add routine to calculate thermal data if self._compute_thermo != 0: timestep_procedures.append( (ComputeThermo(self), {'every': self._compute_thermo})) + + # Data structures and timer/markers initialization + inits = Block.from_list(self, [ + DeclareVariables(self), + DeclareArrays(self), + AllocateProperties(self), + AllocateContactProperties(self), + AllocateFeatureProperties(self), + RegisterTimers(self), + RegisterMarkers(self) + ]) + + # Construct the time-step loop timestep = Timestep(self, self.ntimesteps, timestep_procedures) self.enter(timestep.block) + # Add routine to write VTK data when set if self.vtk_file is not None: timestep.add(VTKWrite(self, self.vtk_file, timestep.timestep(), self.vtk_frequency)) self.leave() + # Combine everything into a whole program + # Initialization and setup functions, together with time-step loop + # UpdateDomain is added after setup_particles because particles must be already present in the simulation body = Block.from_list(self, [ - self.setups, + self.create_domain, + self.setup_particles, + UpdateDomain(self), self.setup_functions, BuildCellListsStencil(self, self.cell_lists), timestep.as_block() ]) - inits = Block.from_list(self, [ - DeclareVariables(self), - DeclareArrays(self), - AllocateProperties(self), - AllocateContactProperties(self), - AllocateFeatureProperties(self), - RegisterTimers(self), - RegisterMarkers(self) - ]) - program = Module(self, name='main', block=Block.merge_blocks(inits, body)) # Apply transformations transformations = Transformations(program, self._target) transformations.apply_all() - # Generate program - #ASTGraph(self.functions, "functions.dot").render() + # Generate whole program self.code_gen.generate_program(program) + + # Generate getters for the runtime functions + self.code_gen.generate_interfaces() diff --git a/src/pairs/sim/timestep.py b/src/pairs/sim/timestep.py index 1281a4d60e4a75272f34b3087cf3ad5c4e772661..abef09a055507f431af554bf7163603e35f8e3cc 100644 --- a/src/pairs/sim/timestep.py +++ b/src/pairs/sim/timestep.py @@ -10,7 +10,7 @@ class Timestep: def __init__(self, sim, nsteps, item_list=None): self.sim = sim self.block = Block(sim, []) - self.timestep_loop = For(sim, 0, nsteps + 1, self.block) + self.timestep_loop = For(sim, 0, nsteps + 1, self.block) if self.sim._generate_whole_program else None if item_list is not None: for item in item_list: @@ -31,13 +31,13 @@ class Timestep: self.add(item) def timestep(self): - return self.timestep_loop.iter() + return self.timestep_loop.iter() if self.sim._generate_whole_program else self.sim.sim_timestep def add(self, item, exec_every=0, item_else=None, skip_first=False): assert exec_every >= 0, "exec_every parameter must be higher or equal than zero!" stmts = item if not isinstance(item, Block) else item.statements() stmts_else = None - ts = self.timestep_loop.iter() + ts = self.timestep() self.sim.enter(self.block) if item_else is not None: @@ -65,7 +65,7 @@ class Timestep: self.sim.capture_statements(False) block = Block(self.sim, [Call_Void(self.sim, "pairs::start_timer", [Timers.All]), - self.timestep_loop, + self.timestep_loop if self.sim._generate_whole_program else self.block, Call_Void(self.sim, "pairs::stop_timer", [Timers.All])]) self.sim.capture_statements(_capture) diff --git a/src/pairs/transformations/__init__.py b/src/pairs/transformations/__init__.py index 7d5cab5efcdede5a7d5dea3926014d2d9571360b..733d5c10fbaec621d37db6a7009e64493f752719 100644 --- a/src/pairs/transformations/__init__.py +++ b/src/pairs/transformations/__init__.py @@ -2,7 +2,7 @@ import time from pairs.analysis import Analysis from pairs.transformations.blocks import LiftDeclarations, MergeAdjacentBlocks from pairs.transformations.devices import AddDeviceCopies, AddDeviceKernels, AddHostReferencesToModules, AddDeviceReferencesToModules -from pairs.transformations.expressions import ReplaceSymbols, LowerNeighborIndexes, SimplifyExpressions, PruneUnusedVectorIndexes, AddExpressionDeclarations +from pairs.transformations.expressions import ReplaceSymbols, LowerNeighborIndexes, ConstantPropagation, SimplifyExpressions, PruneUnusedVectorIndexes, AddExpressionDeclarations from pairs.transformations.instrumentation import AddModulesInstrumentation from pairs.transformations.loops import LICM from pairs.transformations.lower import Lower @@ -10,24 +10,29 @@ from pairs.transformations.modules import DereferenceWriteVariables, AddResizeLo class Transformations: - def __init__(self, ast, target): - self._ast = ast + def __init__(self, ast_list, target): + self._ast_list = ast_list if isinstance(ast_list, list) else [ast_list] self._target = target self._module_resizes = None def apply(self, transformation, data=None): print(f"Applying transformation: {type(transformation).__name__}... ", end="") start = time.time() - transformation.set_ast(self._ast) - if data is not None: - transformation.set_data(data) - self._ast = transformation.mutate() + new_ast_list = [] + for ast in self._ast_list: + transformation.set_ast(ast) + if data is not None: + transformation.set_data(data) + + new_ast_list.append(transformation.mutate()) + + self._ast_list = new_ast_list elapsed = time.time() - start print(f"{elapsed:.2f}s elapsed.") def analysis(self): - return Analysis(self._ast) + return Analysis(self._ast_list) def lower(self, lower_finals=False): nlowered = 1 @@ -42,6 +47,7 @@ class Transformations: self.apply(PruneUnusedVectorIndexes()) self.apply(LowerNeighborIndexes()) self.apply(ReplaceSymbols()) + self.apply(ConstantPropagation()) self.apply(SimplifyExpressions()) def lift_declarations_to_owner_blocks(self): @@ -61,12 +67,14 @@ class Transformations: self._module_resizes = add_resize_logic.module_resizes self.analysis().fetch_modules_references() self.apply(DereferenceWriteVariables()) + self.analysis().infer_modules_return_types() self.apply(ReplaceModulesByCalls(), [self._module_resizes]) self.apply(MergeAdjacentBlocks()) def add_device_copies(self): if self._target.is_gpu(): self.apply(AddDeviceCopies(), [self._module_resizes]) + self.analysis().fetch_modules_references() def add_device_kernels(self): if self._target.is_gpu(): @@ -97,8 +105,13 @@ class Transformations: self.modularize() self.add_device_kernels() self.add_device_copies() - self.add_instrumentation() self.lower(True) self.add_expression_declarations() self.add_host_references_to_modules() self.add_device_references_to_modules() + + # TODO: Place stop timers before the function returns + # or simply don't instrument modules that have a non-void return type + # to avoid having to deal with returns within conditional blocks + # self.add_instrumentation() + diff --git a/src/pairs/transformations/devices.py b/src/pairs/transformations/devices.py index e050f9ed52c82b253970ec35c17097b0f710fc93..d33f30ef174ba06b07d34908aff49926ea982fe2 100644 --- a/src/pairs/transformations/devices.py +++ b/src/pairs/transformations/devices.py @@ -4,7 +4,7 @@ from pairs.ir.block import Block from pairs.ir.branches import Branch, Filter from pairs.ir.cast import Cast from pairs.ir.contexts import Contexts -from pairs.ir.device import CopyArray, CopyContactProperty, CopyProperty, CopyVar, DeviceStaticRef, HostRef +from pairs.ir.device import CopyArray, CopyContactProperty, CopyProperty, CopyFeatureProperty, CopyVar, DeviceStaticRef, HostRef from pairs.ir.functions import Call_Void from pairs.ir.kernel import Kernel, KernelLaunch from pairs.ir.lit import Lit @@ -46,6 +46,9 @@ class AddDeviceCopies(Mutator): for prop, action in s.module.properties().items(): new_stmts += [CopyProperty(s.sim, prop, copy_context, action)] + for fp, action in s.module.feature_properties().items(): + new_stmts += [CopyFeatureProperty(s.sim, fp, copy_context, action)] + for contact_prop, action in s.module.contact_properties().items(): new_stmts += [CopyContactProperty(s.sim, contact_prop, copy_context, action)] @@ -88,6 +91,7 @@ class AddDeviceKernels(Mutator): super().__init__(ast) self._module_name = None self._kernel_id = 0 + self._device_module = False def create_kernel(self, sim, iterator, rmax, block): kernel_name = f"{self._module_name}_kernel{self._kernel_id}" @@ -99,62 +103,28 @@ class AddDeviceKernels(Mutator): self._kernel_id += 1 return kernel + + def mutate_For(self, ast_node): + if ast_node.is_kernel_candidate() and self._device_module: + kernel = self.create_kernel(ast_node.sim, ast_node.iterator, ast_node.max, ast_node.block) + ast_node = KernelLaunch(ast_node.sim, kernel, ast_node.iterator, ast_node.min, ast_node.max) + + else: + ast_node.block = self.mutate(ast_node.block) + + return ast_node def mutate_Module(self, ast_node): + parent_runs_on_device = self._device_module if ast_node.run_on_device: + self._device_module = True self._module_name = ast_node.name self._kernel_id = 0 - new_stmts = [] - for stmt in ast_node._block.stmts: - if stmt is not None: - if isinstance(stmt, For) and stmt.is_kernel_candidate(): - kernel = self.create_kernel(ast_node.sim, stmt.iterator, stmt.max, stmt.block) - new_stmts.append( - KernelLaunch(ast_node.sim, kernel, stmt.iterator, stmt.min, stmt.max)) - - else: - if isinstance(stmt, Branch): - stmt = self.check_and_mutate_branch(stmt) - - new_stmts.append(stmt) - - ast_node._block.stmts = new_stmts - ast_node._block = self.mutate(ast_node._block) + self._device_module = parent_runs_on_device return ast_node - def check_and_mutate_branch(self, ast_node): - new_stmts = [] - for stmt in ast_node.block_if.stmts: - if stmt is not None: - if isinstance(stmt, For) and stmt.is_kernel_candidate(): - kernel = self.create_kernel(ast_node.sim, stmt.iterator, stmt.max, stmt.block) - new_stmts.append( - KernelLaunch(ast_node.sim, kernel, stmt.iterator, stmt.min, stmt.max)) - - else: - new_stmts.append(stmt) - - ast_node.block_if.stmts = new_stmts - - if ast_node.block_else is not None: - new_stmts = [] - for stmt in ast_node.block_else.stmts: - if stmt is not None: - if isinstance(stmt, For) and stmt.is_kernel_candidate(): - kernel = self.create_kernel(ast_node.sim, stmt.iterator, stmt.max, stmt.block) - new_stmts.append( - KernelLaunch(ast_node.sim, kernel, stmt.iterator, stmt.min, stmt.max)) - - else: - new_stmts.append(stmt) - - ast_node.block_else.stmts = new_stmts - - return ast_node - - class AddHostReferencesToModules(Mutator): def __init__(self, ast=None): super().__init__(ast) diff --git a/src/pairs/transformations/expressions.py b/src/pairs/transformations/expressions.py index bd85bf81c65608dd40bb5ced66a360fdad42720e..750bce6b0d8cb579c59ea542aac297c8f1dc9c76 100644 --- a/src/pairs/transformations/expressions.py +++ b/src/pairs/transformations/expressions.py @@ -5,6 +5,51 @@ from pairs.ir.operators import Operators from pairs.ir.types import Types +class ConstantPropagation(Mutator): + def __init__(self, ast=None): + super().__init__(ast) + + def mutate_ScalarOp(self, ast_node): + sim = ast_node.lhs.sim + ast_node.lhs = self.mutate(ast_node.lhs) + if not ast_node.operator().is_unary(): + ast_node.rhs = self.mutate(ast_node.rhs) + + if (not ast_node.operator().is_unary() and + isinstance(ast_node.lhs, Lit) and isinstance(ast_node.rhs, Lit)): + + if ast_node.op == Operators.Add: + return Lit(sim, ast_node.lhs.value + ast_node.rhs.value) + + if ast_node.op == Operators.Sub: + return Lit(sim, ast_node.lhs.value - ast_node.rhs.value) + + if ast_node.op == Operators.Mul: + return Lit(sim, ast_node.lhs.value * ast_node.rhs.value) + + if ast_node.op == Operators.Div: + return Lit(sim, ast_node.lhs.value / ast_node.rhs.value) + + if ast_node.op == Operators.Gt: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value > ast_node.rhs.value) else Lit(sim, 0) + + if ast_node.op == Operators.Lt: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value < ast_node.rhs.value) else Lit(sim, 0) + + if ast_node.op == Operators.Geq: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value >= ast_node.rhs.value) else Lit(sim, 0) + + if ast_node.op == Operators.Leq: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value <= ast_node.rhs.value) else Lit(sim, 0) + + if ast_node.op == Operators.Eq: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value == ast_node.rhs.value) else Lit(sim, 0) + + if ast_node.op == Operators.Neq: + return Lit(sim, 1) if Lit(sim, ast_node.lhs.value != ast_node.rhs.value) else Lit(sim, 0) + + return ast_node + class ReplaceSymbols(Mutator): def __init__(self, ast=None): super().__init__(ast) @@ -148,7 +193,7 @@ class AddExpressionDeclarations(Mutator): self.declared_exprs.append(atomic_add_id) return ast_node - + def mutate_Block(self, ast_node): block_id = id(ast_node) self.decls[block_id] = [] diff --git a/src/pairs/transformations/instrumentation.py b/src/pairs/transformations/instrumentation.py index 1e70bdb7ebe2753d3031cf3a1ed87dd047b69650..88b73c0d8406b97392d267ace3b1e1bbbb3ca068 100644 --- a/src/pairs/transformations/instrumentation.py +++ b/src/pairs/transformations/instrumentation.py @@ -12,16 +12,17 @@ class AddModulesInstrumentation(Mutator): def mutate_ModuleCall(self, ast_node): ast_node._module = self.mutate(ast_node._module) module = ast_node._module - if module.name == 'main': + if module.name == 'main' or module.name == 'initialize': return ast_node - timer_id = module.module_id + Timers.Offset - start_timer = Call_Void(ast_node.sim, "pairs::start_timer", [timer_id]) - stop_timer = Call_Void(ast_node.sim, "pairs::stop_timer", [timer_id]) - if module.must_profile(): start_marker = Call_Void(ast_node.sim, "LIKWID_MARKER_START", [module.name]) stop_marker = Call_Void(ast_node.sim, "LIKWID_MARKER_STOP", [module.name]) - return Block(ast_node.sim, [start_timer, start_marker, ast_node, stop_marker, stop_timer]) + module._block = Block.from_list(ast_node.sim, [start_marker, module._block, stop_marker]) + + timer_id = module.module_id + Timers.Offset + start_timer = Call_Void(ast_node.sim, "pairs::start_timer", [timer_id]) + stop_timer = Call_Void(ast_node.sim, "pairs::stop_timer", [timer_id]) + module._block = Block.from_list(ast_node.sim, [start_timer, module._block, stop_timer]) - return Block(ast_node.sim, [start_timer, ast_node, stop_timer]) + return ast_node diff --git a/src/pairs/transformations/modules.py b/src/pairs/transformations/modules.py index 1ee2c9b15f09caa2e51dafc02cdf3405f7722e5c..4b538305087bed716f778b9095d7ff24619f5ea1 100644 --- a/src/pairs/transformations/modules.py +++ b/src/pairs/transformations/modules.py @@ -9,7 +9,7 @@ from pairs.ir.module import Module, ModuleCall from pairs.ir.mutator import Mutator from pairs.ir.properties import ReallocProperty from pairs.ir.types import Types -from pairs.ir.utils import Print +from pairs.ir.print import Print from pairs.ir.variables import Var, Deref from functools import reduce import operator @@ -195,7 +195,7 @@ class ReplaceModulesByCalls(Mutator): resize_stmts.append( Filter(sim, sim.resizes[resize_id] > 0, Block(sim, - [Print(sim, f"resizes[{resize_id}] -> {capacity.name()}")] + + # [Print(sim, f"resizes[{resize_id}] = " , sim.resizes[resize_id], f" {capacity.name()} = ", capacity)] + [Assign(sim, capacity, self.grow_fn(sim.resizes[resize_id]))] + [a.realloc() for a in capacity.bonded_arrays()] + props_realloc)))