diff --git a/CMakeLists.txt b/CMakeLists.txt index 27c9afc4b893be98110b0073e151c2b70e523a32..0ecd4d90006027630096f1c57c268bb5d494e7da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,5 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(pairs CXX) - # Set default build type if none is specified if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build (Debug, Release, etc.)" FORCE) @@ -9,30 +8,27 @@ endif() set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG") -option(USE_WALBERLA "Enable waLBerla support for using BlockForest partitioning" ON) -option(USE_MPI "USE_MPI" ON) -option(COMPILE_CUDA "COMPILE_CUDA" ON) -option(ENABLE_GPU_DIRECT "ENABLE_GPU_DIRECT" ON) -option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF) -option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF) +option(USE_MPI "USE_MPI" ON) +option(COMPILE_CUDA "COMPILE_CUDA" OFF) +option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF) +option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF) if(GENERATE_WHOLE_PROGRAM AND BUILD_APP) message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n - Choose neither if you only want to use the generated libraries in your project (in a seperate build system).") + Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).") endif() -set(PYTHON_SCRIPT ${PYTHON_SCRIPT} CACHE PATH "The python script triggering code generation") -if(NOT EXISTS ${PYTHON_SCRIPT}) - message(FATAL_ERROR "PYTHON_SCRIPT doesn't exist! Specify it with -DPYTHON_SCRIPT=/path/to/script.py") +set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation") +if(NOT EXISTS ${INPUT_SCRIPT}) + message(FATAL_ERROR "INPUT_SCRIPT doesn't exist! Specify it with -DINPUT_SCRIPT=/path/to/script.py") endif() -get_filename_component(PYTHON_SCRIPT_NAME ${PYTHON_SCRIPT} NAME_WE) +get_filename_component(INPUT_SCRIPT_NAME ${INPUT_SCRIPT} NAME_WE) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") #================================================================================ # Setup directories ============================================================= #================================================================================ -file(COPY runtime DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) file(COPY data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/output) @@ -44,8 +40,10 @@ file(MAKE_DIRECTORY ${OUTPUT_DIR}) #================================================================================ # Generated header (internally used by runtime files) =========================== #================================================================================ -set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/runtime/interfaces/last_generated.hpp) -set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/runtime/interfaces) +# TODO: Unify all interfaces +set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/internal_interfaces) +set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/last_generated.hpp) +file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR}) #================================================================================ # RUNTIME_COMMON_FILES ========================================================== @@ -80,6 +78,10 @@ else() list(APPEND PAIRS_LINK_LIBRARIES ${PAIRS_TARGET}) endif() +# Include P4IRS 'runtime' dir +target_include_directories(${PAIRS_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime) +list(APPEND PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/runtime) + set_target_properties(${PAIRS_TARGET} PROPERTIES CXX_STANDARD_REQUIRED ON CXX_STANDARD 17 @@ -106,7 +108,15 @@ endif() #================================================================================ # waLBerla ====================================================================== #================================================================================ -if(USE_WALBERLA) +set(WALBERLA_DIR ${WALBERLA_DIR} CACHE PATH "Path to waLBerla source directory (required only when using BlockForest partitioning).") + +if(WALBERLA_DIR) + if(EXISTS "${WALBERLA_DIR}") + target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA) + else() + message(FATAL_ERROR "Invalid WALBERLA_DIR: '${WALBERLA_DIR}' does not exist.") + endif() + set(RUNTIME_WALBERLA_FILES runtime/domain/block_forest.cpp ) @@ -119,7 +129,6 @@ if(USE_WALBERLA) endif() target_sources(${PAIRS_TARGET} PRIVATE ${RUNTIME_WALBERLA_FILES}) - target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA) ## Linking walberla modules set(PAIRS_WALBERLA_DEPENDENCIES blockforest core pe) @@ -157,7 +166,7 @@ execute_process( if(COMPILE_CUDA) find_package(CUDA REQUIRED) enable_language(CUDA) - set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${PYTHON_SCRIPT_NAME}.cu") + set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cu") set(CUDA_ARCH ${CUDA_ARCH} CACHE STRING "CUDA_ARCH environment variable must be set.") set(TARGET_ARG "gpu") @@ -174,6 +183,13 @@ if(COMPILE_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") endif() + if(NOT DEFINED ENABLE_GPU_DIRECT) + set(ENABLE_GPU_DIRECT ON "Enable GPU Direct (default: ON when COMPILE_CUDA is ON)" FORCE) + else() + # User choice is respected here if they opt to COMPILE_CUDA without GPU Direct + set(ENABLE_GPU_DIRECT ${ENABLE_GPU_DIRECT} "Enable GPU Direct (user-defined)" FORCE) + endif() + if(ENABLE_GPU_DIRECT) target_compile_definitions(${PAIRS_TARGET} PRIVATE ENABLE_CUDA_AWARE_MPI) endif() @@ -192,7 +208,11 @@ if(COMPILE_CUDA) target_link_libraries(${PAIRS_TARGET} PUBLIC ${CUDA_LIBRARIES}) list(APPEND PAIRS_LINK_LIBRARIES ${CUDA_LIBRARIES}) else() - set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${PYTHON_SCRIPT_NAME}.cpp") + if(ENABLE_GPU_DIRECT) + message(FATAL_ERROR "Invalid combination: ENABLE_GPU_DIRECT requires COMPILE_CUDA to be ON") + endif() + + set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cpp") set(TARGET_ARG "cpu") target_sources(${PAIRS_TARGET} PRIVATE runtime/devices/dummy.cpp) endif() @@ -202,9 +222,9 @@ endif() #================================================================================ add_custom_command( OUTPUT ${GEN_SOURCES} ${GEN_INTERFACE_HEADER} - COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_SCRIPT} ${TARGET_ARG} + COMMAND ${PYTHON_EXECUTABLE} ${INPUT_SCRIPT} ${TARGET_ARG} COMMENT "Generating code with P4IRS" - DEPENDS ${PYTHON_SCRIPT}) + DEPENDS ${INPUT_SCRIPT}) add_custom_target(generated_code DEPENDS ${GEN_SOURCES} ${GEN_INTERFACE_HEADER}) add_dependencies(${PAIRS_TARGET} generated_code) @@ -230,7 +250,11 @@ endif() # LIKWID ======================================================================== #================================================================================ if(LIKWID_DIR) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLIKWID_PERFMON -pthread -L${LIKWID_DIR}/lib -llikwid ") + target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread) + + target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a) + list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a) + include_directories(${LIKWID_DIR}/include) list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include") endif() diff --git a/Makefile b/Makefile deleted file mode 100644 index 967a1a9f117d174c758b82a6af3c41c296ded89c..0000000000000000000000000000000000000000 --- a/Makefile +++ /dev/null @@ -1,87 +0,0 @@ -.PHONY: all build clean - -# General settings -TESTCASE=md -PYCMD=python3 - -# C/C++ compiler settings -CC=mpic++ -CFLAGS=-O3 -mavx2 -mfma -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS} -#CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} -#CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS} -DEBUG_FLAGS= -#DEBUG_FLAGS=-DDEBUG - -# CUDA settings -NVCC=nvcc -#NVCC_FLAGS=-O3 -mavx2 -mfma -NVCC_FLAGS=-O3 -arch=sm_80 -mavx2 -mfma -ffast-math -funroll-loops --forward-unknown-to-host-compiler -#NVCC_FLAGS=-O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler -NVCC_PATH:="$(shell which ${NVCC})" -CUDA_FLAGS=-DENABLE_CUDA_AWARE_MPI -CUDART_FLAGS=-lcudart -L /apps/SPACK/0.19.1/opt/linux-almalinux8-zen/gcc-8.5.0/nvhpc-23.7-bzxcokzjvx4stynglo4u2ffpljajzlam/Linux_x86_64/23.7/cuda/12.2/targets/x86_64-linux/lib - -# MPI settings -MPI_PATH=/apps/SPACK/0.19.1/opt/linux-almalinux8-zen/intel-2021.10.0/openmpi-4.1.6-ijsnjhq77rjc256wlrp52m37rsq6miff -MPI_FLAGS=-I${MPI_PATH}/include - -# Likwid settings -LIKWID_INC ?= -I/usr/local/include -LIKWID_DEFINES ?= -DLIKWID_PERFMON -LIKWID_LIB ?= -L/usr/local/lib -LIKWID_FLAGS = -llikwid ${LIKWID_INC} ${LIKWID_DEFINES} ${LIKWID_LIB} - -# Other -CPU_OBJ_PATH=obj_cpu -CPU_SRC="$(TESTCASE).cpp" -CPU_BIN="$(TESTCASE)_cpu" -GPU_OBJ_PATH=obj_gpu -GPU_SRC="$(TESTCASE).cu" -GPU_BIN="$(TESTCASE)_gpu" - -all: clean build $(CPU_BIN) $(GPU_BIN) - @echo "Everything was done!" - -build: - @echo "Building pairs package..." - $(PYCMD) setup.py build && $(PYCMD) setup.py install --user - -$(CPU_SRC): - @echo "Generating and compiling $(TESTCASE) example for CPU..." - @mkdir -p $(CPU_OBJ_PATH) - $(PYCMD) examples/$(TESTCASE).py cpu - -$(GPU_SRC): - @echo "Generating and compiling $(TESTCASE) example for GPU..." - @mkdir -p $(GPU_OBJ_PATH) - $(PYCMD) examples/$(TESTCASE).py gpu - -$(CPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(CPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(CPU_OBJ_PATH)/dummy.o: runtime/devices/dummy.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS) - -$(GPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS) - -$(GPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp - $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS) - -$(GPU_OBJ_PATH)/cuda_runtime.o: runtime/devices/cuda.cu - $(NVCC) $(NVCC_FLAGS) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) - -# Targets -$(CPU_BIN): $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o - $(CC) $(CFLAGS) -o $(CPU_BIN) $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o $(DEBUG_FLAGS) - -$(GPU_BIN): $(GPU_SRC) $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(GPU_OBJ_PATH)/cuda_runtime.o - $(NVCC) $(NVCC_FLAGS) -c -o $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_SRC) $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) - $(CC) -o $(GPU_BIN) $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_OBJ_PATH)/cuda_runtime.o $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(CUDART_FLAGS) $(CUDA_FLAGS) $(CFLAGS) - -clean: - @echo "Cleaning..." - rm -rf build $(CPU_BIN) $(GPU_BIN) $(CPU_SRC) $(GPU_SRC) dist pairs.egg-info functions functions.pdf $(CPU_OBJ_PATH) $(GPU_OBJ_PATH) diff --git a/README.md b/README.md index bcef6a27a6d2ebb070f8098c59934bcc9fc73a43..882644c488aa6495080216615ba203927e6d0dd6 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,6 @@ P4IRS is an open-source, stand-alone compiler and domain-specific language for particle simulations which aims at generating optimized code for different target hardwares. It is released as a Python package and allows users to define kernels, integrators and other particle routines in a high-level and straightforward fashion without the need to implement any backend code. -## Build instructions - -There is a Makefile which contains configurable environment variables such as `TESTCASE` compiler parameters evaluate P4IRS performance on different scenarios. -`TESTCASE` refers to any of the files within the `examples` directory, such as `md` and `dem`. ## Usage @@ -104,6 +100,64 @@ else: psim.generate() ``` +## Build instructions + +P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment. + +In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build` + +**General CMake flags (applicable to all 3 modes):** +* Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py` +* Enable CUDA with `-DCOMPILE_CUDA=ON` +* Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule) + + +### 1. Whole-program generation: +--------------------- +In this mode, everything including the `main` function is generated by P4IRS. + +1. Set `generate_whole_program=True` in the input script +2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON` + +Example: Build [md.py](examples/whole-program-generation/md.py) +``` +cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON .. +``` +Now call `make` and an **executable** is built. + + +### 2. Modular stand-alone app +--------------------- +You can build a stand-alone C++ app which uses the P4IRS modular interface. + +1. Set `generate_whole_program=False` in the input script +2. Set the CMake flag `-DBUILD_APP=ON` +3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp` + +Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py) +Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory. + +``` +cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla .. +``` +Now call `make` and an **executable** is built. + + +### 3. P4IRS as a library +--------------------- +In this mode, P4IRS is compiled as a library that can be integrated into other projects. + +1. Set `generate_whole_program=False` in the input script +2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default) + +Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows: +```cmake +find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH) +target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS}) +target_link_directories(my_app PUBLIC ${PAIRS_LINK_DIRS}) +target_link_libraries(my_app PUBLIC ${PAIRS_LINK_LIBRARIES}) +``` + ## Citations TBD diff --git a/cmake/FindwaLBerla.cmake b/cmake/FindwaLBerla.cmake index 3eb44c5c971d010f86fef97182d3f499413b1cbf..8f87e88a03902f1c1af3900a3d4d38b921996682 100644 --- a/cmake/FindwaLBerla.cmake +++ b/cmake/FindwaLBerla.cmake @@ -1,5 +1,3 @@ -set( WALBERLA_DIR WALBERLA_DIR-NOTFOUND CACHE PATH "waLBerla path" ) - if ( WALBERLA_DIR ) # WALBERLA_DIR has to point to the waLBerla source directory # this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument) diff --git a/data/planes.input b/data/planes.input index 5de074bf8a14746a8f30ad3e3f612ff34a0c70c9..af3744e15f94a6ec863856ed5913c4fc18d344a3 100644 --- a/data/planes.input +++ b/data/planes.input @@ -1,2 +1,2 @@ -0,1,0.0,0.0,0.0,0.0,0.0,1.0,13 -0,1,0.8,0.015,0.2,0.0,0.0,-1.0,13 +0, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 13 +0, 1, 0.8, 0.015, 0.2, 0.0, 0.0, -1.0, 13 diff --git a/data/sd_planes.input b/data/sd_planes.input index e47832e4ae0413a207b8a5e29446f78b5da2c2c7..42c7724c8b621dfca891624bc9c3d58aea6ca606 100644 --- a/data/sd_planes.input +++ b/data/sd_planes.input @@ -1,6 +1,6 @@ 0, 1, 0, 0, 0, 1, 0, 0, 13 0, 1, 0, 0, 0, 0, 1, 0, 13 0, 1, 0, 0, 0, 0, 0, 1, 13 -0, 1, 1, 1, 1, -1, 0, 0, 13 -0, 1, 1, 1, 1, 0, -1, 0, 13 -0, 1, 1, 1, 1, 0, 0, -1, 13 +0, 1, 10, 10, 10, -1, 0, 0, 13 +0, 1, 10, 10, 10, 0, -1, 0, 13 +0, 1, 10, 10, 10, 0, 0, -1, 13 diff --git a/examples/dem2.py b/examples/dem2.py deleted file mode 100644 index 7ebb47b1a335c4e0728a62500ecb0e2adcd81402..0000000000000000000000000000000000000000 --- a/examples/dem2.py +++ /dev/null @@ -1,111 +0,0 @@ -import pairs -import sys - - -def linear_spring_dashpot(i, j): - penetration_depth = squared_distance(i, j) - radius[i] - radius[j] - skip_when(penetration_depth >= 0.0) - - delta_ij = delta(i, j) - contact_normal = delta_ij / length(delta_ij) - k = radius[j] + 0.5 * penetration_depth - contact_point = position[j] + contact_normal * k - - rel_vel = -velocity_wf[i] - velocity_wf[j] - rel_vel_n = dot(rel_vel, contact_normal) * contact_normal - rel_vel_t = rel_vel - rel_vel_n - - fN = stiffness_norm[i, j] * (-penetration_depth) * contact_normal + damping_norm[i, j] * rel_vel_n; - - tan_spring_disp = tangential_spring_displacement[i, j] - impact_vel_magnitude = impact_velocity_magnitude[i, j] - impact_magnitude = select(impact_vel_magnitude > 0.0, impact_vel_magnitude, length(rel_vel)) - sticking = is_sticking[i, j] - - rotated_tan_disp = tan_spring_disp - contact_normal * (contact_normal * tan_spring_disp) - new_tan_spring_disp = dt * rel_vel_t + \ - select(squared_length(rotated_tan_disp) <= 0.0, - zero_vector(), - rotated_tan_disp * length(tan_spring_disp) / length(rotated_tan_disp)) - - fTLS = stiffness_tan[i, j] * new_tan_spring_disp + damping_tan[i, j] * rel_vel_t - fTLS_len = length(fTLS) - fTLS_inv_len = 1.0 / fTLS_len - t = select(fTLS_len > 0, fTLS / fTLS_inv_len, zero_vector()) - - f_friction_abs_static = friction_static[i, j] * length(fN) - f_friction_abs_dynamic = friction_dynamic[i, j] * length(fN) - tan_vel_threshold = 1e-8 - - cond1 = sticking == 1 and length(rel_vel_t) < tan_vel_threshold and fTLS_len < f_friction_abs_static - cond2 = sticking == 1 and fTLS_len < f_friction_abs_dynamic - f_friction_abs = select(cond1, f_friction_abs_static, f_friction_abs_dynamic) - n_sticking = select(cond1 or cond2 or fTLS_len < f_friction_abs_dynamic, 1, 0) - - if not cond1 and not cond2 and stiffness_tan[i, j] > 0.0: - tangential_spring_displacement[i, j] = \ - (f_friction_abs * t - damping_tan[i, j] * rel_vel_t) / stiffness_tan[i, j] - - else: - tangential_spring_displacement[i, j] = new_tan_spring_disp - - impact_velocity_magnitude[i, j] = impact_magnitude - is_sticking[i, j] = n_sticking - - fTabs = min(fTLS_len, f_friction_abs) - fT = fTabs * t - force[i] += fN + fT - - -def euler(i): - velocity[i] += dt * force[i] / mass[i] - position[i] += dt * velocity[i] - - -cmd = sys.argv[0] -target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" -if target != 'cpu' and target != 'gpu': - print(f"Invalid target, use {cmd} <cpu/gpu>") - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -ntypes = 4 -stiffness_norm = 1.0 -stiffness_tan = 1.0 -damping_norm = 1.0 -damping_tan = 1.0 -friction_static = 1.0 -friction_dynamic = 1.0 - -psim = pairs.simulation("dem", debug=True) -psim.add_position('position') -psim.add_property('mass', pairs.double(), 1.0) -psim.add_property('velocity', pairs.vector()) -psim.add_property('velocity_wf', pairs.vector()) -psim.add_property('force', pairs.vector(), vol=True) -psim.add_property('radius', pairs.double(), 1.0) -psim.add_feature('type', ntypes) -psim.add_feature_property('type', 'stiffness_norm', pairs.double(), [stiffness_norm for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'stiffness_tan', pairs.double(), [stiffness_tan for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'damping_norm', pairs.double(), [damping_norm for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'damping_tan', pairs.double(), [damping_tan for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'friction_static', pairs.double(), [friction_static for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'friction_dynamic', pairs.double(), [friction_dynamic for i in range(ntypes * ntypes)]) -psim.add_contact_property('is_sticking', pairs.int32(), False) -psim.add_contact_property('tangential_spring_displacement', pairs.vector(), [0.0, 0.0, 0.0]) -psim.add_contact_property('impact_velocity_magnitude', pairs.double(), 0.0) - -psim.read_particle_data("data/fluidized_bed.input", ['mass', 'position', 'velocity']) -psim.build_neighbor_lists(cutoff_radius + skin) -psim.vtk_output(f"output/test_{target}") -psim.compute(linear_spring_dashpot, cutoff_radius, symbols={'dt': dt}) -psim.compute(euler, symbols={'dt': dt}) - -if target == 'gpu': - psim.target(pairs.target_gpu()) -else: - psim.target(pairs.target_cpu()) - -psim.generate() diff --git a/examples/dem_sd.py b/examples/dem_sd.py deleted file mode 100644 index da81aad4002cfb84d78f0cabed2ce40ab524198f..0000000000000000000000000000000000000000 --- a/examples/dem_sd.py +++ /dev/null @@ -1,184 +0,0 @@ -import math -import pairs -import sys -import os - -def update_mass_and_inertia(i): - rotation_matrix[i] = diagonal_matrix(1.0) - rotation[i] = default_quaternion() - - if is_sphere(i): - inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i])) - - else: - mass[i] = infinity - inv_inertia[i] = 0.0 - - -def linear_spring_dashpot(i, j): - delta_ij = -penetration_depth(i, j) - skip_when(delta_ij < 0.0) - - meff = 1.0 / ((1.0 / mass[i]) + (1.0 / mass[j])) - stiffness_norm = meff * (pi * pi + lnDryResCoeff * lnDryResCoeff) / \ - (collisionTime_SI * collisionTime_SI) - damping_norm = -2.0 * meff * lnDryResCoeff / collisionTime_SI - damping_tan = sqrt(kappa) * damping_norm - - velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i]) - velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j]) - - rel_vel = -(velocity_wf_i - velocity_wf_j) - rel_vel_n = dot(rel_vel, contact_normal(i, j)) - rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j) - fNabs = stiffness_norm * delta_ij + damping_norm * rel_vel_n - fN = fNabs * contact_normal(i, j) - - fTabs = min(damping_tan * length(rel_vel_t), friction_dynamic[i, j] * fNabs) # static or dynamic? - fT = fTabs * normalized(rel_vel_t) - - partial_force = fN + fT - - apply(force, partial_force) - apply(torque, cross(contact_point(i, j) - position, partial_force)) - - -def euler(i): - inv_mass = 1.0 / mass[i] - position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt - linear_velocity[i] += inv_mass * force[i] * dt - wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i]) - phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt - rotation[i] = quaternion(phi, length(phi)) * rotation[i] - rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i]) - angular_velocity[i] += wdot * dt - - -def gravity(i): - volume = (4.0 / 3.0) * pi * radius[i] * radius[i] * radius[i] - force[i][2] = force[i][2] - (densityParticle_SI - densityFluid_SI) * volume * gravity_SI - # if type[i]==0: - # printf("gravity", i, volume, force[i][2], volume/2, 2>1) - - -cmd = sys.argv[0] -target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" -if target != 'cpu' and target != 'gpu': - print(f"Invalid target, use {cmd} <cpu/gpu>") - -# Config file parameters -domainSize_SI=[0.1, 0.1, 0.1] -diameter_SI = 0.009 -gravity_SI = 9.81 -densityFluid_SI = 1000 -densityParticle_SI = 2550 -generationSpacing_SI = 0.01 -initialVelocity_SI = 1 -dt_SI = 5e-5 -frictionCoefficient = 0.5 -restitutionCoefficient = 0.1 -collisionTime_SI = 5e-4 -poissonsRatio = 0.22 -timeSteps = 1 -visSpacing = 200 -denseBottomLayer = False -bottomLayerOffsetFactor = 1.0 -kappa = 2.0 * (1.0 - poissonsRatio) / (2.0 - poissonsRatio) # from Thornton et al - -minDiameter_SI = diameter_SI #* 0.9 -maxDiameter_SI = diameter_SI #* 1.1 -linkedCellWidth = 1.01 * maxDiameter_SI - -ntypes = 1 -lnDryResCoeff = math.log(restitutionCoefficient) -frictionStatic = 0.0 -frictionDynamic = frictionCoefficient - -file_name = os.path.basename(__file__) -file_name_without_extension = os.path.splitext(file_name)[0] - -psim = pairs.simulation( - file_name_without_extension, - [pairs.sphere(), pairs.halfspace()], - timesteps=timeSteps, - double_prec=True, - use_contact_history=False, - particle_capacity=1000000, - neighbor_capacity=20, - debug=True, generate_whole_program=False) - -if target == 'gpu': - psim.target(pairs.target_gpu()) -else: - psim.target(pairs.target_cpu()) - #psim.target(pairs.target_cpu(parallel=True)) - -psim.add_position('position') -psim.add_property('mass', pairs.real(), 1.0) -psim.add_property('linear_velocity', pairs.vector()) -psim.add_property('angular_velocity', pairs.vector()) -psim.add_property('force', pairs.vector(), volatile=True) -psim.add_property('torque', pairs.vector(), volatile=True) -psim.add_property('hydrodynamic_force', pairs.vector(), reduce=True) -psim.add_property('hydrodynamic_torque', pairs.vector(), reduce=True) -# psim.add_property('old_hydrodynamic_force', pairs.vector()) -# psim.add_property('old_hydrodynamic_torque', pairs.vector()) -psim.add_property('radius', pairs.real(), 1.0) -psim.add_property('normal', pairs.vector()) -psim.add_property('inv_inertia', pairs.matrix()) -psim.add_property('rotation_matrix', pairs.matrix()) -psim.add_property('rotation', pairs.quaternion()) -psim.add_feature('type', ntypes) -psim.add_feature_property('type', 'friction_static', pairs.real(), [frictionStatic for i in range(ntypes * ntypes)]) -psim.add_feature_property('type', 'friction_dynamic', pairs.real(), [frictionDynamic for i in range(ntypes * ntypes)]) - -# psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]]) -psim.set_domain_partitioner(pairs.block_forest()) -# psim.set_domain_partitioner(pairs.regular_domain_partitioner()) -psim.pbc([False, False, False]) -psim.dem_sc_grid( - domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI, - diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes) - -#psim.read_particle_data( -# "data/spheres.input", -# "data/spheres_4x4x2.input", -# "data/spheres_6x6x2.input", -# "data/spheres_8x8x2.input", -# ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) - -#psim.read_particle_data( -# "data/spheres_bottom.input", -# ['type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) - -# psim.read_particle_data( -# "data/planes.input", -# ['type', 'mass', 'position', 'normal', 'flags'], -# pairs.halfspace()) - -psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI, - 'pi': math.pi, - 'infinity': math.inf }) - -#psim.compute_half() -psim.build_cell_lists(linkedCellWidth) -psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) - -psim.compute(gravity, - symbols={'densityParticle_SI': densityParticle_SI, - 'densityFluid_SI': densityFluid_SI, - 'gravity_SI': gravity_SI, - 'pi': math.pi }) - -psim.compute(linear_spring_dashpot, - linkedCellWidth, - symbols={'pi': math.pi, - 'kappa': kappa, - 'lnDryResCoeff': lnDryResCoeff, - 'collisionTime_SI': collisionTime_SI}) - -psim.compute(euler, parameters={'dt' : pairs.real()}) -# psim.compute(euler, symbols={'dt' : dt_SI}) -psim.generate() diff --git a/examples/lift.py b/examples/lift.py deleted file mode 100644 index 2b48fdf02a0afdcc7075843cdab8a1852cee9e45..0000000000000000000000000000000000000000 --- a/examples/lift.py +++ /dev/null @@ -1,14 +0,0 @@ -from coupling.parse_cpp import parse_walberla_file -from coupling.parse_cpp import get_class_method, print_tree - -filename = "mesa_pd/kernel/SpringDashpot.hpp" -translation_unit = parse_walberla_file(filename) - -# subtree = get_subtree(tu.cursor, "walberla::mesa_pd::kernel") -# print_tree(subtree) - -kernel = get_class_method( - translation_unit.cursor, - "walberla::mesa_pd::kernel::SpringDashpot", - "operator()") -print_tree(kernel) diff --git a/examples/lj_embedded.py b/examples/lj_embedded.py deleted file mode 100644 index 160c6081bacc9e0a3cd6e8561ebff9ea2eb7bf15..0000000000000000000000000000000000000000 --- a/examples/lj_embedded.py +++ /dev/null @@ -1,32 +0,0 @@ -import pairs - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -sigma = 1.0 -epsilon = 1.0 -sigma6 = sigma ** 6 - -psim = pairs.simulation("lj") -mass = psim.add_real_property('mass', 1.0) -position = psim.add_vector_property('position') -velocity = psim.add_vector_property('velocity') -force = psim.add_vector_property('force', vol=True) -psim.from_file("data/minimd_setup_4x4x4.input", ['mass', 'position', 'velocity']) -psim.create_cell_lists(2.8, 2.8) -psim.create_neighbor_lists() -psim.periodic(2.8) -psim.vtk_output("output/test") - -for i, j, delta, rsq in psim.particle_pairs(cutoff_radius, position): - sr2 = 1.0 / rsq - sr6 = sr2 * sr2 * sr2 * sigma6 - f = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon - force[i].add(delta * f) - -for i in psim.particles(): - velocity[i].add(dt * force[i] / mass[i]) - position[i].add(dt * velocity[i]) - -psim.generate() diff --git a/examples/lj_onetype.py b/examples/lj_onetype.py deleted file mode 100644 index e703122d5469e96458dd831aedbfa4cf9c456090..0000000000000000000000000000000000000000 --- a/examples/lj_onetype.py +++ /dev/null @@ -1,45 +0,0 @@ -import pairs -import sys - - -def lj(i, j): - sr2 = 1.0 / rsq - sr6 = sr2 * sr2 * sr2 * sigma6 - force[i] += delta * 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon - - -def euler(i): - velocity[i] += dt * force[i] / mass[i] - position[i] += dt * velocity[i] - - -cmd = sys.argv[0] -target = sys.argv[1] if len(sys.argv[1]) > 1 else "none" -if target != 'cpu' and target != 'gpu': - print(f"Invalid target, use {cmd} <cpu/gpu>") - - -dt = 0.005 -cutoff_radius = 2.5 -skin = 0.3 -sigma = 1.0 -epsilon = 1.0 -sigma6 = sigma ** 6 - -psim = pairs.simulation("lj", debug=True) -psim.add_real_property('mass', 1.0) -psim.add_position('position') -psim.add_vector_property('velocity') -psim.add_vector_property('force', vol=True) -psim.from_file("data/minimd_setup_32x32x32.input", ['mass', 'position', 'velocity']) -psim.build_neighbor_lists(cutoff_radius + skin) -psim.vtk_output(f"output/test_{target}") -psim.compute(lj, cutoff_radius, {'sigma6': sigma6, 'epsilon': epsilon}) -psim.compute(euler, symbols={'dt': dt}) - -if target == 'gpu': - psim.target(pairs.target_gpu()) -else: - psim.target(pairs.target_cpu()) - -psim.generate() diff --git a/examples/main.cpp b/examples/main.cpp deleted file mode 100644 index 42a9575e000df84e33af7aa51adb683f3b94c5d6..0000000000000000000000000000000000000000 --- a/examples/main.cpp +++ /dev/null @@ -1,175 +0,0 @@ -#include <iostream> -//--- -#include "dem_sd.hpp" -#include <blockforest/BlockForest.h> -#include <blockforest/Initialization.h> - -int main(int argc, char **argv) { - double dt = 5e-5; - auto pairs_sim = std::make_shared<PairsSimulation>(); - auto pairs_acc = std::make_shared<PairsAccessor>(pairs_sim); - - // Create forest (make sure to use_domain(forest)) ---------------------------------------------- - // walberla::math::AABB domain(0, 0, 0, 0.1, 0.1, 0.1); - // std::shared_ptr<walberla::mpi::MPIManager> mpiManager = walberla::mpi::MPIManager::instance(); - // mpiManager->initializeMPI(&argc, &argv); - // mpiManager->useWorldComm(); - // auto procs = mpiManager->numProcesses(); - // auto block_config = walberla::Vector3<int>(2, 2, 2); - // auto ref_level = 0; - // std::shared_ptr<walberla::BlockForest> forest = walberla::blockforest::createBlockForest( - // domain, block_config, walberla::Vector3<bool>(false, false, false), procs, ref_level); - //----------------------------------------------------------------------------------------------- - - // initialize pairs data structures ---------------------------------------------- - pairs_sim->initialize(); - - // either create new domain or use an existing one ---------------------------------------- - pairs_sim->set_domain(argc, argv, 0, 0, 0, 0.1, 0.1, 0.1); - - // pairs_sim->use_domain(forest); - - // create planes and particles ------------------------------------------------------------ - pairs_sim->create_halfspace(0, 0, 0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0, 0, 0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0, 0, 0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(0.1, 0.1, 0.1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0.1, 0.1, 0.1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(0.1, 0.1, 0.1, 0, 0, -1, 0, 13); - - // pairs::id_t pUid = pairs_sim->create_sphere(0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); - // pairs::id_t pUid2 = pairs_sim->create_sphere(0.0499, 0.0499, 0.0499, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); - - // Tracking a particle ------------------------------------------------------------------------ - // if (pUid != pairs_acc->getInvalidUid()){ - // std::cout<< "Particle " << pUid << " is created in rank " << rank << std::endl; - // } - - // walberla::mpi::allReduceInplace(pUid, walberla::mpi::SUM); - // walberla::mpi::allReduceInplace(pUid2, walberla::mpi::SUM); - // MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - // MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - - - // if (pUid != pairs_acc->getInvalidUid()){ - // std::cout<< "Particle " << pUid << " will be tracked by rank " << rank << std::endl; - // } - - // auto pIsLocalInMyRank = [&](pairs::id_t uid){return pairs_acc->uidToIdx(uid) != pairs_acc->getInvalidIdx();}; - // auto pIsGhostInMyRank = [&](pairs::id_t uid){return pairs_acc->uidToIdxGhost(uid) != pairs_acc->getInvalidIdx();}; - - - // TODO: make sure linkedCellWidth is larger than max diameter in the system - // setup particles, setup functions, and the cell list stencil------------------------------- - pairs_sim->setup_sim(); - - // for(int i=0; i<pairs_acc->size(); ++i){ - // if(pairs_acc->getShape(i) == 0){ - // std::cout<< "rank: " << rank << " sphere " << pairs_acc->getUid(i) << " " << pairs_acc->getPosition(i) << std::endl; - // } - // else if(pairs_acc->getShape(i) == 1){ - // std::cout<< "rank: " << rank << " halfspace " << pairs_acc->getUid(i) << " " << pairs_acc->getPosition(i) << pairs_acc->getNormal(i) << std::endl; - // } - // } - - // if (pIsLocalInMyRank(pUid)){ - // int idx = pairs_acc->uidToIdx(pUid); - // pairs_acc->setPosition(idx, walberla::Vector3<double>(0.01, 0.01, 0.08)); - // pairs_acc->setLinearVelocity(idx, walberla::Vector3<double>(0.5, 0.5, 0.5)); - // } - - for (int t=0; t<5000; ++t){ - // if ((t%200==0) && (pIsLocalInMyRank(pUid))){ - // int idx = pairs_acc->uidToIdx(pUid); - // std::cout<< "Tracked particle is now in rank " << rank << " --- " << pairs_acc->getPosition(idx)<< std::endl; - // } - - pairs_sim->communicate(t); - - // if(pairs_sim->rank() == 0){ - // pairs_sim->pairs_runtime->copyPropertyToHost(0, ReadOnly, (((pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost) * 1) * sizeof(unsigned long long int))); // uid - // for(int i=0; i<pairs_sim->pobj->nlocal; ++i){ - // std::cout << "rank ##0## local uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl; - // } - // } - // if(pairs_sim->rank() == 2){ - // pairs_sim->pairs_runtime->copyPropertyToHost(0, ReadOnly, (((pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost) * 1) * sizeof(unsigned long long int))); // uid - // for(int i=0; i<pairs_sim->pobj->nlocal; ++i){ - // std::cout << "rank 2 local uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl; - // } - // for(int i=pairs_sim->pobj->nlocal; i<pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost; ++i){ - // std::cout << "rank 2 ghost uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl; - // } - // } - - // std::cout << pairs_sim->rank() << " --------- nlocal = " << pairs_acc->nlocal() << " nghost = " << pairs_acc->nghost() << std::endl; - - // if (pairs_sim->rank() == 2) { - // for(int i=0; i<pairs_acc->nlocal(); ++i){ - // std::cout << pairs_sim->rank() << " Local " << i << " -- uid= " << pairs_acc->getUid(i) << std::endl; - // } - // for(int i=pairs_acc->nlocal(); i<pairs_acc->size(); ++i){ - // std::cout << pairs_sim->rank() << " Ghost -- " << i << " -- uid= " << pairs_acc->getUid(i) << std::endl; - // } - // } - - - - // if (pIsLocalInMyRank(pUid)){ - // int idx = pairs_acc->uidToIdx(pUid); - // pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(1, 1, 1)); - // pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(2, 2, 2)); - // } - // if (pIsLocalInMyRank(pUid2)){ - // int idx = pairs_acc->uidToIdx(pUid2); - // pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(10, 10, 10)); - // pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(20, 20, 20)); - // } - - pairs_sim->update_cells(); - pairs_sim->gravity(); - pairs_sim->linear_spring_dashpot(); - pairs_sim->euler(dt); - - // std::cout << "reset_volatiles" << std::endl; - pairs_sim->reset_volatiles(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_acc->nlocal(), t, 200); - pairs_sim->vtk_write("output/dem_sd_ghost", pairs_acc->nlocal(), pairs_acc->size(), t, 200); - - // if (pIsGhostInMyRank(pUid)){ - // std::cout << pairs_sim->rank() << " - ghost 1: " << pUid << std::endl; - // int idx = pairs_acc->uidToIdxGhost(pUid); - // pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(1, 1, 1)); - // pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(3, 3, 3)); - // } - // if (pIsGhostInMyRank(pUid2)){ - // std::cout << pairs_sim->rank() << " - ghost 2: " << pUid2 << std::endl; - // int idx = pairs_acc->uidToIdxGhost(pUid2); - // pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(2, 2, 2)); - // pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(4, 4, 4)); - // } - - // std::cout << "reverse comm and reduce" << std::endl; - // pairs_sim->reverse_comm(); - - // if (pIsLocalInMyRank(pUid)){ - // int idx = pairs_acc->uidToIdx(pUid); - // auto forceSum = pairs_acc->getHydrodynamicForce(idx); - // auto torqueSum = pairs_acc->getHydrodynamicTorque(idx); - // std::cout << pUid << " @@@@ reduced force = " << forceSum << std::endl; - // std::cout << pUid << " @@@@ reduced torque = " << torqueSum << std::endl; - // } - // if (pIsLocalInMyRank(pUid2)){ - // int idx = pairs_acc->uidToIdx(pUid2); - // auto forceSum = pairs_acc->getHydrodynamicForce(idx); - // auto torqueSum = pairs_acc->getHydrodynamicTorque(idx); - // std::cout << pUid2 << " @@@@ reduced force = " << forceSum << std::endl; - // std::cout << pUid2 << " @@@@ reduced torque = " << torqueSum << std::endl; - // } - } - - pairs_sim->end(); - - return 0; -} diff --git a/examples/main.cu b/examples/main.cu deleted file mode 100644 index 94224e0c635a0d218c9c35d3c9479b3166d3bb61..0000000000000000000000000000000000000000 --- a/examples/main.cu +++ /dev/null @@ -1,143 +0,0 @@ -#include <iostream> -#include <cuda_runtime.h> - -void checkCudaError(cudaError_t err, const char* func) { - if (err != cudaSuccess) { - fprintf(stderr, "CUDA error in %s: %s\n", func, cudaGetErrorString(err)); - exit(err); - } -} - -template< typename Type > -class PairsVector3 { -public: - PairsVector3() = default; - - // If the constructor is called from device, v_ is automatically allocated on - // device because it's a static array embeded in the object itself - __host__ __device__ PairsVector3( Type x, Type y, Type z ) { - v_[0] = x; - v_[1] = y; - v_[2] = z; - } - - __host__ __device__ Type& operator[]( int index ) { - return v_[index]; - } - __host__ __device__ const Type& operator[] ( int index ) const { - return v_[index]; - } - -private: - Type v_[3] = {Type(), Type(), Type()}; -}; - - -struct PairsObjects { - double *position_h; - double *position_d; -}; - - -class PairsAccessor{ - private: - PairsObjects *pobj_h; - PairsObjects *pobj_d; - - public: - // PairsAccessor is only constructable from host, but its getters and setters can be called from both host and device - __host__ PairsAccessor(PairsObjects *pobj_): pobj_h(pobj_) { - - // NOTE: Here we copy pobj_h to device, but we will use pobj_d to ONLY work with device pointers - // So for example, we only try to access pobj_d->position_d, NOT pobj_d->position_h - // NOTE: What's copied to device here is the PairsObject, which is a bunch of pointers (including valid device pointers like position_d) - // TODO (maybe): Split PairsObjects into two structs, one holding host pointers, the other holding device pointers. - cudaMalloc(&pobj_d, sizeof(PairsObjects)); - cudaMemcpy(pobj_d, pobj_h, sizeof(PairsObjects), cudaMemcpyHostToDevice); - } - - __host__ __device__ int getTest(){ - #ifdef __CUDA_ARCH__ - return 12; - #else - return 34; - #endif - } - - // If this function is called from device, it returns a PairsVector3 that's constructed on device - // If this function is called from host, it returns a PairsVector3 that's constructed on host - __host__ __device__ PairsVector3<double> getPosition(const size_t i) const { - #ifdef __CUDA_ARCH__ - // Assume postion_d points to uptodate data (we can't do copyPropertyToDevice from __device__ ) - return PairsVector3<double>(pobj_d->position_d[i*3 + 0], pobj_d->position_d[i*3 + 1], pobj_d->position_d[i*3 + 2]); - #else - // Here we can do copyPropertyToHost (ReadOnly) if needed, to make sure position_h has uptodate data - return PairsVector3<double>(pobj_h->position_h[i*3 + 0], pobj_h->position_h[i*3 + 1], pobj_h->position_h[i*3 + 2]); - #endif - - } - - __host__ __device__ void setPosition(const size_t i, PairsVector3<double> const &vec) { - #ifdef __CUDA_ARCH__ - // Assume vec is on device - pobj_d->position_d[i*3 + 0] = vec[0]; - pobj_d->position_d[i*3 + 1] = vec[1]; - pobj_d->position_d[i*3 + 2] = vec[2]; - // Assume we don't need position_h data back on host (we can't do copyPropertyToHost from __device__) - #else - // Assume vec is on host - pobj_h->position_h[i*3 + 0] = vec[0]; - pobj_h->position_h[i*3 + 1] = vec[1]; - pobj_h->position_h[i*3 + 2] = vec[2]; - // Here we can do copyPropertyToDevice (WriteOnly) if needed (just so host and device data match) - #endif - - } - -}; - - -__global__ void mykernel(PairsAccessor ac){ - printf("getTest from device = %d\n", ac.getTest()); - - PairsVector3<double> pos(7,8,9); - ac.setPosition(0, pos); - printf("getPosition(0) from device = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]); -} - - -int main(int argc, char **argv) { - - PairsObjects *pobj = new PairsObjects; - - // User doesn't bother with the stuff below, they are done when PairsSimulation is initialized - //---------------------------------------------------------------------------------------------------- - int numParticles = 1; - int numElements = numParticles * 3; - pobj->position_h = new double[numElements]; - cudaMalloc(&pobj->position_d, numElements * sizeof(double)); - cudaMemcpy(pobj->position_d, pobj->position_h, numElements * sizeof(double), cudaMemcpyHostToDevice); - //---------------------------------------------------------------------------------------------------- - - PairsAccessor ac(pobj); - printf("getTest from host = %d\n", ac.getTest()); - - PairsVector3<double> pos(1.2, 3.4, 5.6); - ac.setPosition(0, pos); - printf("getPosition(0) from host = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]); - - - mykernel<<<1,1>>>(ac); - checkCudaError(cudaDeviceSynchronize(), "mykernel"); - - // In mykernel, we modify the position of the particle ON DEVICE - // TODO: To reflect this modification on the host, we need a 'sync' function along with getters and setters (eg: 'syncPosition') - // to make sure both host and device data are uptodate and in sync with eachother. - // But unlike getters and setters, sync functions are only callable form host - // The 'sync' function copies the property to device if its host flag is set, or copies property to host if its device flag is set - // If setter is called from host, set host flag and unset device flag for that property - // If setter is called from device, set device flag and unset host flag for that property - -} - - diff --git a/examples/modular/force_reduction.cpp b/examples/modular/force_reduction.cpp index e5d2394d108a3068ec86ec8b8026d3ae95c8daca..e884b3d0acab91189ffd4465364d9e24319e8992 100644 --- a/examples/modular/force_reduction.cpp +++ b/examples/modular/force_reduction.cpp @@ -9,10 +9,11 @@ int main(int argc, char **argv) { auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); // Set domain - pairs_sim->set_domain(argc, argv, 0, 0, 0, 0.1, 0.1, 0.1); + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 0.1, 0.1, 0.1); // Create bodies - pairs::id_t pUid = pairs_sim->create_sphere(0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); + pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0); // setup_sim after creating all bodies pairs_sim->setup_sim(); @@ -93,8 +94,7 @@ int main(int argc, char **argv) { pairs_sim->update_cells(t); pairs_sim->gravity(); pairs_sim->spring_dashpot(); - pairs_sim->euler(5e-5); - pairs_sim->reset_volatiles(); + pairs_sim->euler(5e-5); //------------------------------------------------------------------------------------------- std::cout << "---- reverse_comm and reduce ----" << std::endl; diff --git a/examples/modular/sd_1.cpp b/examples/modular/sd_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e56cfc5943c7626d97d6b872524655b9dd0dca0b --- /dev/null +++ b/examples/modular/sd_1.cpp @@ -0,0 +1,47 @@ +#include <iostream> +#include <memory> + +#include "spring_dashpot.hpp" + +int main(int argc, char **argv) { + + auto pairs_sim = std::make_shared<PairsSimulation>(); + pairs_sim->initialize(); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); + + pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); + pairs_sim->update_mass_and_inertia(); + + int num_timesteps = 2000; + int vtk_freq = 20; + double dt = 1e-3; + + for (int t=0; t<num_timesteps; ++t){ + if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; + + pairs_sim->communicate(t); + + pairs_sim->update_cells(t); + + pairs_sim->gravity(); + pairs_sim->spring_dashpot(); + pairs_sim->euler(dt); + + pairs::vtk_write_data(pairs_runtime, "output/sd_1_local", 0, pairs_sim->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_1_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); + } + + pairs_sim->end(); +} diff --git a/examples/modular/sd_1_CPU_GPU.cpp b/examples/modular/sd_1_CPU_GPU.cpp deleted file mode 100644 index 898d3e9826493af37b4c29da9a382322e219173e..0000000000000000000000000000000000000000 --- a/examples/modular/sd_1_CPU_GPU.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include <iostream> -#include <memory> - -#include "spring_dashpot.hpp" - -int main(int argc, char **argv) { - - auto pairs_sim = std::make_shared<PairsSimulation>(); - pairs_sim->initialize(); - - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); - - pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); - pairs_sim->update_mass_and_inertia(); - - int num_timesteps = 2000; - int vtk_freq = 20; - double dt = 1e-3; - - for (int t=0; t<num_timesteps; ++t){ - if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; - - pairs_sim->communicate(t); - - pairs_sim->update_cells(t); - - pairs_sim->gravity(); - pairs_sim->spring_dashpot(); - pairs_sim->euler(dt); - - pairs_sim->reset_volatiles(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); - } - - pairs_sim->end(); -} diff --git a/examples/modular/sd_2_CPU_GPU.cpp b/examples/modular/sd_2.cpp similarity index 65% rename from examples/modular/sd_2_CPU_GPU.cpp rename to examples/modular/sd_2.cpp index 73b86385cccd5063fd068b8f2df1cbb2c69bd04e..4bdb4c85a37efef0391ccecc9d2a1e1df6e3313d 100644 --- a/examples/modular/sd_2_CPU_GPU.cpp +++ b/examples/modular/sd_2.cpp @@ -30,16 +30,17 @@ int main(int argc, char **argv) { // Pass forest to P4IRS // ------------------------------------------------------------------------------- - pairs_sim->use_domain(forest); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->useDomain(forest); + + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); + pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1); pairs_sim->update_mass_and_inertia(); @@ -59,10 +60,8 @@ int main(int argc, char **argv) { pairs_sim->spring_dashpot(); pairs_sim->euler(dt); - pairs_sim->reset_volatiles(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_2_local", 0, pairs_sim->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_2_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); } pairs_sim->end(); diff --git a/examples/modular/sd_3_CPU.cpp b/examples/modular/sd_3_CPU.cpp index 7dc10829d64ea658b76ffc5e95f251a8d43550c5..a8e4a93bc5e816eb8eee900b7ef1d6769e7ec206 100644 --- a/examples/modular/sd_3_CPU.cpp +++ b/examples/modular/sd_3_CPU.cpp @@ -14,18 +14,19 @@ int main(int argc, char **argv) { pairs_sim->initialize(); auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); + + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - - pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); + pairs::id_t pUid = pairs::create_sphere(pairs_runtime ,0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); @@ -81,14 +82,13 @@ int main(int argc, char **argv) { // Euler //------------------------------------------------------------------------------------------- pairs_sim->euler(dt); - pairs_sim->reset_volatiles(); // Communicate //------------------------------------------------------------------------------------------- pairs_sim->communicate(t); - pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_local", 0, ac->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_ghost", ac->nlocal(), ac->size(), t, vtk_freq); } pairs_sim->end(); diff --git a/examples/modular/sd_3_GPU.cu b/examples/modular/sd_3_GPU.cu index 4abdeedcdc844a6f94975bca0aedf1586df6c5e8..b44af846643ed9cf0a730b07c5f60543560e29b8 100644 --- a/examples/modular/sd_3_GPU.cu +++ b/examples/modular/sd_3_GPU.cu @@ -46,17 +46,18 @@ int main(int argc, char **argv) { // Create PairsAccessor after PairsSimulation is initialized auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); + auto pairs_runtime = pairs_sim->getPairsRuntime(); + pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1); - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13); + pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13); - pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0); + pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0); + pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0); set_feature_properties(ac); @@ -74,7 +75,6 @@ int main(int argc, char **argv) { int num_timesteps = 2000; int vtk_freq = 20; double dt = 1e-3; - pairs_sim->vtk_write_subdom("output/subdom", 0, 1); for (int t=0; t<num_timesteps; ++t){ // Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host @@ -137,7 +137,6 @@ int main(int argc, char **argv) { // Euler //------------------------------------------------------------------------------------------- pairs_sim->euler(dt); - pairs_sim->reset_volatiles(); // Communicate //------------------------------------------------------------------------------------------- @@ -145,8 +144,8 @@ int main(int argc, char **argv) { // PairsAccessor requires an update when particles are communicated ac->update(); - pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); + pairs::vtk_write_data(pairs_runtime, "output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); } pairs_sim->end(); diff --git a/examples/modular/sd_4.cpp b/examples/modular/sd_4.cpp index be064d969e400b61940d3980786d73ed21909a39..80ec40313e13692d1b41fcee01f6b5ce7d4ef91b 100644 --- a/examples/modular/sd_4.cpp +++ b/examples/modular/sd_4.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) { auto pairs_runtime = pairs_sim->getPairsRuntime(); pairs_runtime->initDomain(&argc, &argv, - 0, 20, 0, 20, 0, 20, // Domain bounds + 0, 0, 0, 20, 20, 20, // Domain bounds false, false, false, // PBCs --------------> TODO: runtime pbc true // Enable dynamic load balancing (does initial refinement on a <1,1,1> blockforest) ); diff --git a/examples/modular/sd_sample_1_CPU_GPU.cpp b/examples/modular/sd_sample_1_CPU_GPU.cpp deleted file mode 100644 index 960ffbaf7dec899ea3c054b0ea8d3191569a440e..0000000000000000000000000000000000000000 --- a/examples/modular/sd_sample_1_CPU_GPU.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include <iostream> -#include <memory> - -#include "spring_dashpot.hpp" - -int main(int argc, char **argv) { - - auto pairs_sim = std::make_shared<PairsSimulation>(); - pairs_sim->initialize(); - - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); - - pairs_sim->setup_sim(); - pairs_sim->update_mass_and_inertia(); - - int num_timesteps = 2000; - int vtk_freq = 20; - double dt = 1e-3; - - for (int t=0; t<num_timesteps; ++t){ - if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; - - pairs_sim->communicate(t); - - pairs_sim->update_cells(t); - - pairs_sim->gravity(); - pairs_sim->spring_dashpot(); - pairs_sim->euler(dt); - - pairs_sim->reset_volatiles(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); - } - - pairs_sim->end(); -} diff --git a/examples/modular/sd_sample_2_CPU_GPU.cpp b/examples/modular/sd_sample_2_CPU_GPU.cpp deleted file mode 100644 index e18ce39c3d01a5f7156d727d26a17f9080e7ca22..0000000000000000000000000000000000000000 --- a/examples/modular/sd_sample_2_CPU_GPU.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include <iostream> -#include <memory> - -#include <blockforest/BlockForest.h> -#include <blockforest/Initialization.h> - -#include "spring_dashpot.hpp" - -int main(int argc, char **argv) { - - auto pairs_sim = std::make_shared<PairsSimulation>(); - pairs_sim->initialize(); - - // Create forest - // ------------------------------------------------------------------------------- - walberla::math::AABB domain(0, 0, 0, 1, 1, 1); - std::shared_ptr<walberla::mpi::MPIManager> mpiManager = walberla::mpi::MPIManager::instance(); - mpiManager->initializeMPI(&argc, &argv); - mpiManager->useWorldComm(); - auto procs = mpiManager->numProcesses(); - - walberla::Vector3<int> block_config; - if (procs==1) block_config = walberla::Vector3<int>(1, 1, 1); - else if (procs==4) block_config = walberla::Vector3<int>(2, 2, 1); - else { std::cout << "Error: Check block_config" << std::endl; exit(-1);} - - auto ref_level = 0; - std::shared_ptr<walberla::BlockForest> forest = walberla::blockforest::createBlockForest( - domain, block_config, walberla::Vector3<bool>(false, false, false), procs, ref_level); - - // Pass forest to P4IRS - // ------------------------------------------------------------------------------- - pairs_sim->use_domain(forest); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0); - - pairs_sim->setup_sim(); - pairs_sim->update_mass_and_inertia(); - - int num_timesteps = 2000; - int vtk_freq = 20; - double dt = 1e-3; - - for (int t=0; t<num_timesteps; ++t){ - if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl; - - pairs_sim->communicate(t); - - pairs_sim->update_cells(t); - - pairs_sim->gravity(); - pairs_sim->spring_dashpot(); - pairs_sim->euler(dt); - - pairs_sim->reset_volatiles(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq); - } - - pairs_sim->end(); -} diff --git a/examples/modular/sd_sample_3_CPU.cpp b/examples/modular/sd_sample_3_CPU.cpp deleted file mode 100644 index 002e4f907b4c77b403dc0b640dcfde93511514bf..0000000000000000000000000000000000000000 --- a/examples/modular/sd_sample_3_CPU.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include <iostream> -#include <memory> - -#include "spring_dashpot.hpp" - -void change_gravitational_force(std::shared_ptr<PairsAccessor> &ac, int idx){ - pairs::Vector3<double> upward_gravity(0.0, 0.0, 2 * ac->getMass(idx) * 9.81); - ac->setForce(idx, ac->getForce(idx) + upward_gravity); -} - -int main(int argc, char **argv) { - - auto pairs_sim = std::make_shared<PairsSimulation>(); - pairs_sim->initialize(); - - auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); - - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - - pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0); - - MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - - auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; - - pairs_sim->setup_sim(); - pairs_sim->update_mass_and_inertia(); - - pairs_sim->communicate(0); - - int num_timesteps = 2000; - int vtk_freq = 20; - double dt = 1e-3; - - for (int t=0; t<num_timesteps; ++t){ - - // Print position of particle pUid - //------------------------------------------------------------------------------------------- - if(pIsLocalInMyRank(pUid)){ - std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; - int idx = ac->uidToIdxLocal(pUid); - std::cout << "Position = (" - << ac->getPosition(idx)[0] << ", " - << ac->getPosition(idx)[1] << ", " - << ac->getPosition(idx)[2] << ")" << std::endl; - - } - - // Calculate forces - //------------------------------------------------------------------------------------------- - pairs_sim->update_cells(t); - pairs_sim->gravity(); - pairs_sim->spring_dashpot(); - - // Change gravitational force on particle pUid - //------------------------------------------------------------------------------------------- - if(pIsLocalInMyRank(pUid)){ - int idx = ac->uidToIdxLocal(pUid); - - std::cout << "Force before changing = (" - << ac->getForce(idx)[0] << ", " - << ac->getForce(idx)[1] << ", " - << ac->getForce(idx)[2] << ")" << std::endl; - - change_gravitational_force(ac, idx); - - std::cout << "Force after changing = (" - << ac->getForce(idx)[0] << ", " - << ac->getForce(idx)[1] << ", " - << ac->getForce(idx)[2] << ")" << std::endl; - } - - // Euler - //------------------------------------------------------------------------------------------- - pairs_sim->euler(dt); - pairs_sim->reset_volatiles(); - - // Communicate - //------------------------------------------------------------------------------------------- - pairs_sim->communicate(t); - - pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); - } - - pairs_sim->end(); -} \ No newline at end of file diff --git a/examples/modular/sd_sample_3_GPU.cu b/examples/modular/sd_sample_3_GPU.cu deleted file mode 100644 index e18bbf0705542f05bac1ab28be06e16fa2527f5b..0000000000000000000000000000000000000000 --- a/examples/modular/sd_sample_3_GPU.cu +++ /dev/null @@ -1,152 +0,0 @@ -#include <iostream> -#include <memory> -#include <cuda_runtime.h> - -#include "spring_dashpot.hpp" - -void checkCudaError(cudaError_t err, const char* func) { - if (err != cudaSuccess) { - fprintf(stderr, "CUDA error in %s: %s\n", func, cudaGetErrorString(err)); - exit(err); - } -} - -__global__ void print_position(PairsAccessor ac, int idx){ - printf("Position [from device] = (%f, %f, %f) \n", ac.getPosition(idx)[0], ac.getPosition(idx)[1], ac.getPosition(idx)[2]); -} - -__global__ void change_gravitational_force(PairsAccessor ac, int idx){ - printf("Force [from device] before setting = (%f, %f, %f) \n", ac.getForce(idx)[0], ac.getForce(idx)[1], ac.getForce(idx)[2]); - - pairs::Vector3<double> upward_gravity(0.0, 0.0, 2 * ac.getMass(idx) * 9.81); - ac.setForce(idx, ac.getForce(idx) + upward_gravity); - - printf("Force [from device] after setting = (%f, %f, %f) \n", ac.getForce(idx)[0], ac.getForce(idx)[1], ac.getForce(idx)[2]); -} - -void set_feature_properties(std::shared_ptr<PairsAccessor> &ac){ - ac->setTypeStiffness(0,0, 0); - ac->setTypeStiffness(0,1, 1000); - ac->setTypeStiffness(1,0, 1000); - ac->setTypeStiffness(1,1, 3000); - ac->syncTypeStiffness(); - - ac->setTypeDampingNorm(0,0, 0); - ac->setTypeDampingNorm(0,1, 20); - ac->setTypeDampingNorm(1,0, 20); - ac->setTypeDampingNorm(1,1, 10); - ac->syncTypeDampingNorm(); -} - -int main(int argc, char **argv) { - - auto pairs_sim = std::make_shared<PairsSimulation>(); - pairs_sim->initialize(); - - // Create PairsAccessor after PairsSimulation is initialized - auto ac = std::make_shared<PairsAccessor>(pairs_sim.get()); - - pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1); - - pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13); - pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13); - pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13); - pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13); - - pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0); - pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0); - - set_feature_properties(ac); - - MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); - - auto pIsLocalInMyRank = [&](pairs::id_t uid){return ac->uidToIdxLocal(uid) != ac->getInvalidIdx();}; - - pairs_sim->setup_sim(); - pairs_sim->update_mass_and_inertia(); - - pairs_sim->communicate(0); - // PairsAccessor requires an update when particles are communicated - ac->update(); - - int num_timesteps = 2000; - int vtk_freq = 20; - double dt = 1e-3; - - for (int t=0; t<num_timesteps; ++t){ - // Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host - ac->syncUid(PairsAccessor::Host); - - // Print position of particle pUid - //------------------------------------------------------------------------------------------- - if(pIsLocalInMyRank(pUid)){ - std::cout << "Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; - int idx = ac->uidToIdxLocal(pUid); - - // Up-to-date position might be on host or device. - // Sync position on Host before reading it from host: - ac->syncPosition(PairsAccessor::Host); - std::cout << "Position [from host] = (" - << ac->getPosition(idx)[0] << ", " - << ac->getPosition(idx)[1] << ", " - << ac->getPosition(idx)[2] << ")" << std::endl; - - // Sync position on Device before reading it from device: - ac->syncPosition(PairsAccessor::Device); - print_position<<<1,1>>>(*ac, idx); - checkCudaError(cudaDeviceSynchronize(), "print_position"); - - // There's no need to sync position here to continue the simulation, since position wasn't modified. - } - - // Calculate forces - //------------------------------------------------------------------------------------------- - pairs_sim->update_cells(t); - pairs_sim->gravity(); - pairs_sim->spring_dashpot(); - - // Change gravitational force on particle pUid - //------------------------------------------------------------------------------------------- - ac->syncUid(PairsAccessor::Host); - - if(pIsLocalInMyRank(pUid)){ - std::cout << "Force Timestep (" << t << "): Particle " << pUid << " is in rank " << pairs_sim->rank() << std::endl; - int idx = ac->uidToIdxLocal(pUid); - - // Up-to-date force and mass might be on host or device. - // So sync them in Device before accessing them on device. (No data will be transfered if they are already on device) - ac->syncForce(PairsAccessor::Device); - ac->syncMass(PairsAccessor::Device); - - // Modify force from device: - change_gravitational_force<<<1,1>>>(*ac, idx); - checkCudaError(cudaDeviceSynchronize(), "change_gravitational_force"); - - // Force on device was modified. - // So sync force before continuing the simulation. - ac->syncForce(PairsAccessor::Host); - std::cout << "Force [from host] after changing = (" - << ac->getForce(idx)[0] << ", " - << ac->getForce(idx)[1] << ", " - << ac->getForce(idx)[2] << ")" << std::endl; - } - - // Euler - //------------------------------------------------------------------------------------------- - pairs_sim->euler(dt); - pairs_sim->reset_volatiles(); - - // Communicate - //------------------------------------------------------------------------------------------- - pairs_sim->communicate(t); - // PairsAccessor requires an update when particles are communicated - ac->update(); - - pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq); - pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq); - } - - pairs_sim->end(); -} \ No newline at end of file diff --git a/examples/dem.py b/examples/whole-program-generation/linear_spring_dashpot.py similarity index 91% rename from examples/dem.py rename to examples/whole-program-generation/linear_spring_dashpot.py index 7f3d4d508dad66b372e4000b93a7476116697bae..9a13b2bd363d29face05936ade47bcf47f0f2735 100644 --- a/examples/dem.py +++ b/examples/whole-program-generation/linear_spring_dashpot.py @@ -97,9 +97,6 @@ if target != 'cpu' and target != 'gpu': # Config file parameters domainSize_SI = [0.8, 0.015, 0.2] -#domainSize_SI = [0.4, 0.4, 0.2] # node base -#domainSize_SI = [0.6, 0.6, 0.2] # node base -#domainSize_SI = [0.8, 0.8, 0.2] # node base diameter_SI = 0.0029 gravity_SI = 9.81 densityFluid_SI = 1000 @@ -112,7 +109,6 @@ restitutionCoefficient = 0.1 collisionTime_SI = 5e-4 poissonsRatio = 0.22 timeSteps = 10000 -#timeSteps = 1000 visSpacing = 100 denseBottomLayer = False bottomLayerOffsetFactor = 1.0 @@ -128,13 +124,14 @@ frictionStatic = 0.0 frictionDynamic = frictionCoefficient psim = pairs.simulation( - "dem", + "linear_spring_dashpot", [pairs.sphere(), pairs.halfspace()], timesteps=timeSteps, double_prec=True, use_contact_history=True, particle_capacity=1000000, - neighbor_capacity=20) + neighbor_capacity=20, + generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -167,29 +164,16 @@ psim.dem_sc_grid( domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI, diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes) -#psim.read_particle_data( -# "data/spheres.input", -# "data/spheres_4x4x2.input", -# "data/spheres_6x6x2.input", -# "data/spheres_8x8x2.input", -# ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) - -#psim.read_particle_data( -# "data/spheres_bottom.input", -# ['type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'], -# pairs.sphere()) psim.read_particle_data( "data/planes.input", - ['uid', 'type', 'mass', 'position', 'normal', 'flags'], + ['type', 'mass', 'position', 'normal', 'flags'], pairs.halfspace()) psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI, 'pi': math.pi, 'infinity': math.inf }) -#psim.compute_half() psim.build_cell_lists(linkedCellWidth) psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) diff --git a/examples/md.py b/examples/whole-program-generation/md.py similarity index 83% rename from examples/md.py rename to examples/whole-program-generation/md.py index 32bc1da44d44e6f1e3027979090ce511aa052f02..22eb3c1f58d0fb7d8892232b04a9aa8b146cccf4 100644 --- a/examples/md.py +++ b/examples/whole-program-generation/md.py @@ -35,8 +35,12 @@ nz = 32 rho = 0.8442 temp = 1.44 -psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True, debug=True) -#psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True, debug=True, generate_whole_program=True) +psim = pairs.simulation("md", + [pairs.point_mass()], + timesteps=200, + double_prec=True, + debug=True, + generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -52,14 +56,12 @@ psim.add_feature_property('type', 'epsilon', pairs.real(), [epsilon for i in ran psim.add_feature_property('type', 'sigma6', pairs.real(), [sigma6 for i in range(ntypes * ntypes)]) psim.copper_fcc_lattice(nx, ny, nz, rho, temp, ntypes) -#psim.set_domain_partitioner(pairs.block_forest()) psim.set_domain_partitioner(pairs.regular_domain_partitioner()) psim.compute_thermo(100) psim.reneighbor_every(20) -#psim.compute_half() psim.build_neighbor_lists(cutoff_radius + skin) -psim.vtk_output(f"output/md_{target}") +# psim.vtk_output(f"output/md_{target}") psim.compute(initial_integrate, symbols={'dt': dt}, pre_step=True, skip_first=True) psim.compute(lennard_jones, cutoff_radius) diff --git a/examples/whole-program-generation/spring_dashpot.py b/examples/whole-program-generation/spring_dashpot.py index c12ec34af1934466d5c05f5d26e90561bd995f36..6212b0aa7fd41d3d78ad4655f3d9de8f8afa1750 100644 --- a/examples/whole-program-generation/spring_dashpot.py +++ b/examples/whole-program-generation/spring_dashpot.py @@ -49,13 +49,6 @@ def gravity(i): force[i][2] -= mass[i] * gravity_SI -# Number of 'type' features and their pair-wise properties -ntypes = 2 -stiffness_SI = [100000 for i in range(ntypes * ntypes)] -dampingNorm_SI = [300 for i in range(ntypes * ntypes)] -dampingTan_SI = [0.0 for i in range(ntypes * ntypes)] -friction_SI = [0.0 for i in range(ntypes * ntypes)] - # Domain size domainSize_SI=[10, 10, 10] @@ -81,6 +74,8 @@ visSpacing = 20 timeSteps = 2000 +# file_name_without_extension is the simulation identifer (in this case "spring_dashpot") +# TODO: Integration with cmake file_name = os.path.basename(__file__) file_name_without_extension = os.path.splitext(file_name)[0] @@ -103,6 +98,7 @@ else: print(f"Invalid target, use {sys.argv[0]} <cpu/gpu>") +# Register properties psim.add_position('position') psim.add_property('mass', pairs.real()) psim.add_property('linear_velocity', pairs.vector()) @@ -115,19 +111,28 @@ psim.add_property('inv_inertia', pairs.matrix()) psim.add_property('rotation_matrix', pairs.matrix()) psim.add_property('rotation', pairs.quaternion()) +# Define the number of 'type' features and their pair-wise properties +ntypes = 2 +stiffness_SI = [100000 for i in range(ntypes * ntypes)] +dampingNorm_SI = [300 for i in range(ntypes * ntypes)] +dampingTan_SI = [0.5 for i in range(ntypes * ntypes)] +friction_SI = [20.0 for i in range(ntypes * ntypes)] + +# Register 'type' as a feature psim.add_feature('type', ntypes) + +# Register properties for the 'type' feature psim.add_feature_property('type', 'stiffness', pairs.real(), stiffness_SI) psim.add_feature_property('type', 'damping_norm', pairs.real(), dampingNorm_SI) psim.add_feature_property('type', 'damping_tan', pairs.real(), dampingTan_SI) psim.add_feature_property('type', 'friction', pairs.real(), friction_SI) -psim.set_domain_partitioner(pairs.block_forest()) -psim.pbc([False, False, False]) -psim.build_cell_lists(linkedCellWidth) - +# Define the domain and optimization strategies psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]]) - +psim.pbc([False, False, False]) +psim.set_domain_partitioner(pairs.block_forest()) psim.set_workload_balancer(pairs.morton(), regrid_min=100, regrid_max=1000, rebalance_frequency=200) +psim.build_cell_lists(linkedCellWidth) # Generate particles psim.dem_sc_grid(domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], @@ -144,13 +149,14 @@ psim.read_particle_data( "data/sd_planes.input", ['type', 'mass', 'position', 'n psim.vtk_output(f"output/dem_{target}", frequency=visSpacing) -# The user-defined setup functions are executed only once before the timestep loop +# The user-defined 'setup' functions are executed only once before the timestep loop psim.setup(update_mass_and_inertia, symbols={'infinity': math.inf }) -# The user-defined compute functions are added to the timestep loop in the order they are given to 'compute' +# The user-defined 'compute' functions are added to the timestep loop in the order they are given to 'compute' psim.compute(spring_dashpot, linkedCellWidth) psim.compute(gravity, symbols={'gravity_SI': gravity_SI }) psim.compute(euler, symbols={'dt': dt_SI}) +# Triger code generation psim.generate() diff --git a/runtime/domain/ParticleDataHandling.hpp b/runtime/domain/ParticleDataHandling.hpp index cf467d99548884612fd1e4181664e79ebde34a49..c54bae6404434af71f43c3d6a98d32d0e0537574 100644 --- a/runtime/domain/ParticleDataHandling.hpp +++ b/runtime/domain/ParticleDataHandling.hpp @@ -127,7 +127,7 @@ public: } void serialize(IBlock *const block, const BlockDataID& id, mpi::SendBuffer& buffer) override { - serializeImpl(static_cast<Block *const>(block), id, buffer, 0, false); + serializeImpl(static_cast<Block*>(block), id, buffer, 0, false); } internal::ParticleDeleter* deserialize(IBlock *const block) override { diff --git a/runtime/interfaces/dem.hpp b/runtime/interfaces/dem.hpp deleted file mode 100644 index b593c2707d053e0c6ac547b8ec23783f5d15e5f6..0000000000000000000000000000000000000000 --- a/runtime/interfaces/dem.hpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "../pairs.hpp" - -namespace pairs_host_interface { - -int get_uid(int *uid, int i) { return uid[i]; } -int get_shape(int *shape, int i) { return shape[i]; } -int get_flags(int *flags, int i) { return flags[i]; } -double get_position(double *position, int i, int j, int capacity) { return position[i * 3 + j]; } -double get_mass(double *mass, int i) { return mass[i]; } -double get_linear_velocity(double *linear_velocity, int i, int j, int capacity) { return linear_velocity[i * 3 + j]; } -double get_angular_velocity(double *angular_velocity, int i, int j, int capacity) { return angular_velocity[i * 3 + j]; } -double get_force(double *force, int i, int j, int capacity) { return force[i * 3 + j]; } -double get_torque(double *torque, int i, int j, int capacity) { return torque[i * 3 + j]; } -double get_radius(double *radius, int i) { return radius[i]; } -double get_normal(double *normal, int i, int j, int capacity) { return normal[i * 3 + j]; } -double get_inv_inertia(double *inv_inertia, int i, int j, int capacity) { return inv_inertia[i * 9 + j]; } -double get_rotation_matrix(double *rotation_matrix, int i, int j, int capacity) { return rotation_matrix[i * 9 + j]; } -double get_rotation_quat(double *rotation_quat, int i, int j, int capacity) { return rotation_quat[i * 4 + j]; } -int get_type(int *type, int i) { return type[i]; } - -} - -namespace pairs_cuda_interface { - -__inline__ __device__ int get_uid(int *uid, int i) { return uid[i]; } -__inline__ __device__ int get_shape(int *shape, int i) { return shape[i]; } -__inline__ __device__ int get_flags(int *flags, int i) { return flags[i]; } -__inline__ __device__ double get_position(double *position, int i, int j, int capacity) { return position[i * 3 + j]; } -__inline__ __device__ double get_mass(double *mass, int i) { return mass[i]; } -__inline__ __device__ double get_linear_velocity(double *linear_velocity, int i, int j, int capacity) { return linear_velocity[i * 3 + j]; } -__inline__ __device__ double get_angular_velocity(double *angular_velocity, int i, int j, int capacity) { return angular_velocity[i * 3 + j]; } -__inline__ __device__ double get_force(double *force, int i, int j, int capacity) { return force[i * 3 + j]; } -__inline__ __device__ double get_torque(double *torque, int i, int j, int capacity) { return torque[i * 3 + j]; } -__inline__ __device__ double get_radius(double *radius, int i) { return radius[i]; } -__inline__ __device__ double get_normal(double *normal, int i, int j, int capacity) { return normal[i * 3 + j]; } -__inline__ __device__ double get_inv_inertia(double *inv_inertia, int i, int j, int capacity) { return inv_inertia[i * 9 + j]; } -__inline__ __device__ double get_rotation_matrix(double *rotation_matrix, int i, int j, int capacity) { return rotation_matrix[i * 9 + j]; } -__inline__ __device__ double get_rotation_quat(double *rotation_quat, int i, int j, int capacity) { return rotation_quat[i * 4 + j]; } -__inline__ __device__ int get_type(int *type, int i) { return type[i]; } - -} diff --git a/runtime/interfaces/md.hpp b/runtime/interfaces/md.hpp deleted file mode 100644 index c8bb6b29da34073cb58e82a8b54dc5d0f832f3ce..0000000000000000000000000000000000000000 --- a/runtime/interfaces/md.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "../pairs.hpp" - -namespace pairs_host_interface { - -int get_uid(int *uid, int i) { return uid[i]; } -int get_shape(int *shape, int i) { return shape[i]; } -int get_flags(int *flags, int i) { return flags[i]; } -double get_position(double *position, int i, int j, int capacity) { return position[i * 3 + j]; } -double get_mass(double *mass, int i) { return mass[i]; } -double get_linear_velocity(double *linear_velocity, int i, int j, int capacity) { return linear_velocity[i * 3 + j]; } -double get_force(double *force, int i, int j, int capacity) { return force[i * 3 + j]; } -int get_type(int *type, int i) { return type[i]; } - -} - -namespace pairs_cuda_interface { - -__inline__ __device__ int get_uid(int *uid, int i) { return uid[i]; } -__inline__ __device__ int get_shape(int *shape, int i) { return shape[i]; } -__inline__ __device__ int get_flags(int *flags, int i) { return flags[i]; } -__inline__ __device__ double get_position(double *position, int i, int j, int capacity) { return position[i * 3 + j]; } -__inline__ __device__ double get_mass(double *mass, int i) { return mass[i]; } -__inline__ __device__ double get_linear_velocity(double *linear_velocity, int i, int j, int capacity) { return linear_velocity[i * 3 + j]; } -__inline__ __device__ double get_force(double *force, int i, int j, int capacity) { return force[i * 3 + j]; } -__inline__ __device__ int get_type(int *type, int i) { return type[i]; } - -} diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 4897a35ebf1d8614b2b81b72075faaed6491e683..6efead8d8cb598c1fcd481beb35c12464c235830 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -14,7 +14,7 @@ namespace pairs { void PairsRuntime::initDomain( int *argc, char ***argv, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, bool pbcx, bool pbcy, bool pbcz, bool balance_workload) { diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 54abd6d4546453714638800844c6afc73d24db1f..e87dec06224d830f8f15fdc2e593278c00d100e6 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -320,7 +320,7 @@ public: // Communication void initDomain( int *argc, char ***argv, - real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax, + real_t xmin, real_t ymin, real_t zmin, real_t xmax, real_t ymax, real_t zmax, bool pbcx = 0, bool pbcy = 0, bool pbcz = 0, bool balance_workload = 0); template<typename Domain_T> diff --git a/runtime/vtk.cpp b/runtime/vtk.cpp index db805a3b63a3f6c0ed52638edb69c45e1a2cd308..b6235725c9d7d10cb38e4033a1e456b6b50ab32a 100644 --- a/runtime/vtk.cpp +++ b/runtime/vtk.cpp @@ -23,12 +23,6 @@ void vtk_write_aabb(PairsRuntime *ps, const char *filename, int num, filename_oss <<".vtk"; std::ofstream out_file(filename_oss.str()); - double aabb[3][3]; - for (int d=0; d<3; ++d){ - aabb[d][0] = ps->getDomainPartitioner()->getSubdomMin(d); - aabb[d][1] = ps->getDomainPartitioner()->getSubdomMax(d); - } - out_file << std::fixed << std::setprecision(prec); if(out_file.is_open()) { out_file << "# vtk DataFile Version 2.0\n"; diff --git a/src/pairs/code_gen/accessor.py b/src/pairs/code_gen/accessor.py index eb2a73dde25ec1e47b2a2ded20b3068de98ade09..34421cb64cde53f7a75a9079b39ee90c25b7b235 100644 --- a/src/pairs/code_gen/accessor.py +++ b/src/pairs/code_gen/accessor.py @@ -26,9 +26,9 @@ class PairsAcessor: if self.target.is_gpu(): self.host_device_attr = "__host__ __device__ " self.host_attr = "__host__ " - self.print("#include \"runtime/math/Vector3.hpp\"") - # self.print("#include \"runtime/math/Quaternion.hpp\"") - # self.print("#include \"runtime/math/Matrix3.hpp\"") + self.print("#include \"math/Vector3.hpp\"") + # self.print("#include \"math/Quaternion.hpp\"") + # self.print("#include \"math/Matrix3.hpp\"") self.print("") self.print("class PairsAccessor {") diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index d4e09851ca747ec81bc0817da7580ea8f081bd21..76e22283330f531eed2f449a30eee798bf62c00e 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -57,6 +57,15 @@ class CGen: def real_type(self): return Types.c_keyword(self.sim, Types.Real) + + # def generate_cmake_config_file(self): + # self.print = Printer("pairs_cmake_params.txt") + # self.print.start() + # self.print(f"PAIRS_TARGET={self.ref}") + # self.print(f"GENERATE_WHOLE_PROGRAM={'ON' if self.sim._generate_whole_program else 'OFF'}") + # self.print(f"USE_WALBERLA={'ON' if self.sim._partitioner == DomainPartitioners.BlockForest else 'OFF'}") + # # self.print(f"COMPILE_CUDA={'ON' if self.target.is_gpu() else 'OFF'}") + # self.print.end() def generate_object_reference(self, obj, device=False, index=None): if device and (not self.target.is_gpu() or not obj.device_flag): @@ -99,8 +108,8 @@ class CGen: return f"&({ref})" def generate_interfaces(self): - #self.print = Printer(f"runtime/interfaces/{self.ref}.hpp") - self.print = Printer("runtime/interfaces/last_generated.hpp") + #self.print = Printer(f"interfaces/{self.ref}.hpp") + self.print = Printer("internal_interfaces/last_generated.hpp") self.print.start() self.print("#pragma once") self.generate_interface_namespace('pairs_host_interface') @@ -141,14 +150,9 @@ class CGen: self.print("}") def generate_preamble(self): - self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"") - - # TODO: Either do this, or add USE_WALBERLA to you compile definitions - if self.sim.partitioner()==DomainPartitioners.BlockForest: - self.print("#define USE_WALBERLA") + # self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"") if self.target.is_gpu(): - self.print("#define PAIRS_TARGET_CUDA") self.print("#include <math_constants.h>") if self.target.is_openmp(): @@ -161,16 +165,16 @@ class CGen: self.print("#include <stdio.h>") self.print("#include <stdlib.h>") self.print("//---") - self.print("#include \"runtime/likwid-marker.h\"") - self.print("#include \"runtime/copper_fcc_lattice.hpp\"") - self.print("#include \"runtime/create_body.hpp\"") - self.print("#include \"runtime/dem_sc_grid.hpp\"") - self.print("#include \"runtime/pairs.hpp\"") - self.print("#include \"runtime/read_from_file.hpp\"") - self.print("#include \"runtime/stats.hpp\"") - self.print("#include \"runtime/timing.hpp\"") - self.print("#include \"runtime/thermo.hpp\"") - self.print("#include \"runtime/vtk.hpp\"") + self.print("#include \"likwid-marker.h\"") + self.print("#include \"copper_fcc_lattice.hpp\"") + self.print("#include \"create_body.hpp\"") + self.print("#include \"dem_sc_grid.hpp\"") + self.print("#include \"pairs.hpp\"") + self.print("#include \"read_from_file.hpp\"") + self.print("#include \"stats.hpp\"") + self.print("#include \"timing.hpp\"") + self.print("#include \"thermo.hpp\"") + self.print("#include \"vtk.hpp\"") self.print("") self.print("using namespace pairs;") self.print("") diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index ef0502dd2c0ccd7e6588e89f041a42e89040d3d2..485e1a6cbb8c8fe54dabdadd7efd0e7368c3d23b 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -46,8 +46,8 @@ class DimensionRanges: return sum([array[i] for i in self.step_indexes(step)]) def initialize(self): - grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] - Call_Void(self.sim, "pairs_runtime->initDomain", [param for delim in grid_array for param in delim]) + grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] + Call_Void(self.sim, "pairs_runtime->initDomain", grid_array) def update(self): Call_Void(self.sim, "pairs_runtime->updateDomain", []) @@ -141,11 +141,10 @@ class BlockForest: return self.reduce_step def initialize(self): - grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] + grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())] Call_Void(self.sim, "pairs_runtime->initDomain", - [param for delim in grid_array for param in delim] + - self.sim._pbc + ([True] if self.load_balancer is not None else [])) + grid_array + self.sim._pbc + ([True] if self.load_balancer is not None else [])) if self.load_balancer is not None: PrintCode(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer"