Skip to content
Snippets Groups Projects
Commit 0a0a2dd6 authored by Behzad Safaei's avatar Behzad Safaei
Browse files

Improve CMake, update README and examples, remove outdated files

parent 842850ea
No related branches found
No related tags found
No related merge requests found
Showing
with 199 additions and 916 deletions
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(pairs CXX)
# Set default build type if none is specified
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build (Debug, Release, etc.)" FORCE)
......@@ -9,30 +8,27 @@ endif()
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
option(USE_WALBERLA "Enable waLBerla support for using BlockForest partitioning" ON)
option(USE_MPI "USE_MPI" ON)
option(COMPILE_CUDA "COMPILE_CUDA" ON)
option(ENABLE_GPU_DIRECT "ENABLE_GPU_DIRECT" ON)
option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF)
option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF)
option(USE_MPI "USE_MPI" ON)
option(COMPILE_CUDA "COMPILE_CUDA" OFF)
option(GENERATE_WHOLE_PROGRAM "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF)
option(BUILD_APP "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF)
if(GENERATE_WHOLE_PROGRAM AND BUILD_APP)
message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n
Choose neither if you only want to use the generated libraries in your project (in a seperate build system).")
Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).")
endif()
set(PYTHON_SCRIPT ${PYTHON_SCRIPT} CACHE PATH "The python script triggering code generation")
if(NOT EXISTS ${PYTHON_SCRIPT})
message(FATAL_ERROR "PYTHON_SCRIPT doesn't exist! Specify it with -DPYTHON_SCRIPT=/path/to/script.py")
set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation")
if(NOT EXISTS ${INPUT_SCRIPT})
message(FATAL_ERROR "INPUT_SCRIPT doesn't exist! Specify it with -DINPUT_SCRIPT=/path/to/script.py")
endif()
get_filename_component(PYTHON_SCRIPT_NAME ${PYTHON_SCRIPT} NAME_WE)
get_filename_component(INPUT_SCRIPT_NAME ${INPUT_SCRIPT} NAME_WE)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
#================================================================================
# Setup directories =============================================================
#================================================================================
file(COPY runtime DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(COPY data DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/output)
......@@ -44,8 +40,10 @@ file(MAKE_DIRECTORY ${OUTPUT_DIR})
#================================================================================
# Generated header (internally used by runtime files) ===========================
#================================================================================
set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/runtime/interfaces/last_generated.hpp)
set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/runtime/interfaces)
# TODO: Unify all interfaces
set(GEN_INTERFACE_DIR ${CMAKE_CURRENT_BINARY_DIR}/internal_interfaces)
set(GEN_INTERFACE_HEADER ${CMAKE_CURRENT_BINARY_DIR}/last_generated.hpp)
file(MAKE_DIRECTORY ${GEN_INTERFACE_DIR})
#================================================================================
# RUNTIME_COMMON_FILES ==========================================================
......@@ -80,6 +78,10 @@ else()
list(APPEND PAIRS_LINK_LIBRARIES ${PAIRS_TARGET})
endif()
# Include P4IRS 'runtime' dir
target_include_directories(${PAIRS_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime)
list(APPEND PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/runtime)
set_target_properties(${PAIRS_TARGET} PROPERTIES
CXX_STANDARD_REQUIRED ON
CXX_STANDARD 17
......@@ -106,7 +108,15 @@ endif()
#================================================================================
# waLBerla ======================================================================
#================================================================================
if(USE_WALBERLA)
set(WALBERLA_DIR ${WALBERLA_DIR} CACHE PATH "Path to waLBerla source directory (required only when using BlockForest partitioning).")
if(WALBERLA_DIR)
if(EXISTS "${WALBERLA_DIR}")
target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA)
else()
message(FATAL_ERROR "Invalid WALBERLA_DIR: '${WALBERLA_DIR}' does not exist.")
endif()
set(RUNTIME_WALBERLA_FILES
runtime/domain/block_forest.cpp
)
......@@ -119,7 +129,6 @@ if(USE_WALBERLA)
endif()
target_sources(${PAIRS_TARGET} PRIVATE ${RUNTIME_WALBERLA_FILES})
target_compile_definitions(${PAIRS_TARGET} PUBLIC USE_WALBERLA)
## Linking walberla modules
set(PAIRS_WALBERLA_DEPENDENCIES blockforest core pe)
......@@ -157,7 +166,7 @@ execute_process(
if(COMPILE_CUDA)
find_package(CUDA REQUIRED)
enable_language(CUDA)
set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${PYTHON_SCRIPT_NAME}.cu")
set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cu")
set(CUDA_ARCH ${CUDA_ARCH} CACHE STRING "CUDA_ARCH environment variable must be set.")
set(TARGET_ARG "gpu")
......@@ -174,6 +183,13 @@ if(COMPILE_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3")
endif()
if(NOT DEFINED ENABLE_GPU_DIRECT)
set(ENABLE_GPU_DIRECT ON "Enable GPU Direct (default: ON when COMPILE_CUDA is ON)" FORCE)
else()
# User choice is respected here if they opt to COMPILE_CUDA without GPU Direct
set(ENABLE_GPU_DIRECT ${ENABLE_GPU_DIRECT} "Enable GPU Direct (user-defined)" FORCE)
endif()
if(ENABLE_GPU_DIRECT)
target_compile_definitions(${PAIRS_TARGET} PRIVATE ENABLE_CUDA_AWARE_MPI)
endif()
......@@ -192,7 +208,11 @@ if(COMPILE_CUDA)
target_link_libraries(${PAIRS_TARGET} PUBLIC ${CUDA_LIBRARIES})
list(APPEND PAIRS_LINK_LIBRARIES ${CUDA_LIBRARIES})
else()
set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${PYTHON_SCRIPT_NAME}.cpp")
if(ENABLE_GPU_DIRECT)
message(FATAL_ERROR "Invalid combination: ENABLE_GPU_DIRECT requires COMPILE_CUDA to be ON")
endif()
set(GEN_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_SCRIPT_NAME}.cpp")
set(TARGET_ARG "cpu")
target_sources(${PAIRS_TARGET} PRIVATE runtime/devices/dummy.cpp)
endif()
......@@ -202,9 +222,9 @@ endif()
#================================================================================
add_custom_command(
OUTPUT ${GEN_SOURCES} ${GEN_INTERFACE_HEADER}
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_SCRIPT} ${TARGET_ARG}
COMMAND ${PYTHON_EXECUTABLE} ${INPUT_SCRIPT} ${TARGET_ARG}
COMMENT "Generating code with P4IRS"
DEPENDS ${PYTHON_SCRIPT})
DEPENDS ${INPUT_SCRIPT})
add_custom_target(generated_code DEPENDS ${GEN_SOURCES} ${GEN_INTERFACE_HEADER})
add_dependencies(${PAIRS_TARGET} generated_code)
......@@ -230,7 +250,11 @@ endif()
# LIKWID ========================================================================
#================================================================================
if(LIKWID_DIR)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLIKWID_PERFMON -pthread -L${LIKWID_DIR}/lib -llikwid ")
target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread)
target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a)
list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a)
include_directories(${LIKWID_DIR}/include)
list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include")
endif()
......
.PHONY: all build clean
# General settings
TESTCASE=md
PYCMD=python3
# C/C++ compiler settings
CC=mpic++
CFLAGS=-O3 -mavx2 -mfma -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS}
#CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
#CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
DEBUG_FLAGS=
#DEBUG_FLAGS=-DDEBUG
# CUDA settings
NVCC=nvcc
#NVCC_FLAGS=-O3 -mavx2 -mfma
NVCC_FLAGS=-O3 -arch=sm_80 -mavx2 -mfma -ffast-math -funroll-loops --forward-unknown-to-host-compiler
#NVCC_FLAGS=-O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler
NVCC_PATH:="$(shell which ${NVCC})"
CUDA_FLAGS=-DENABLE_CUDA_AWARE_MPI
CUDART_FLAGS=-lcudart -L /apps/SPACK/0.19.1/opt/linux-almalinux8-zen/gcc-8.5.0/nvhpc-23.7-bzxcokzjvx4stynglo4u2ffpljajzlam/Linux_x86_64/23.7/cuda/12.2/targets/x86_64-linux/lib
# MPI settings
MPI_PATH=/apps/SPACK/0.19.1/opt/linux-almalinux8-zen/intel-2021.10.0/openmpi-4.1.6-ijsnjhq77rjc256wlrp52m37rsq6miff
MPI_FLAGS=-I${MPI_PATH}/include
# Likwid settings
LIKWID_INC ?= -I/usr/local/include
LIKWID_DEFINES ?= -DLIKWID_PERFMON
LIKWID_LIB ?= -L/usr/local/lib
LIKWID_FLAGS = -llikwid ${LIKWID_INC} ${LIKWID_DEFINES} ${LIKWID_LIB}
# Other
CPU_OBJ_PATH=obj_cpu
CPU_SRC="$(TESTCASE).cpp"
CPU_BIN="$(TESTCASE)_cpu"
GPU_OBJ_PATH=obj_gpu
GPU_SRC="$(TESTCASE).cu"
GPU_BIN="$(TESTCASE)_gpu"
all: clean build $(CPU_BIN) $(GPU_BIN)
@echo "Everything was done!"
build:
@echo "Building pairs package..."
$(PYCMD) setup.py build && $(PYCMD) setup.py install --user
$(CPU_SRC):
@echo "Generating and compiling $(TESTCASE) example for CPU..."
@mkdir -p $(CPU_OBJ_PATH)
$(PYCMD) examples/$(TESTCASE).py cpu
$(GPU_SRC):
@echo "Generating and compiling $(TESTCASE) example for GPU..."
@mkdir -p $(GPU_OBJ_PATH)
$(PYCMD) examples/$(TESTCASE).py gpu
$(CPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp
$(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS)
$(CPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp
$(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS)
$(CPU_OBJ_PATH)/dummy.o: runtime/devices/dummy.cpp
$(CC) -c -o $@ $< $(DEBUG_FLAGS) $(CFLAGS)
$(GPU_OBJ_PATH)/pairs.o: runtime/pairs.cpp
$(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS)
$(GPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp
$(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS)
$(GPU_OBJ_PATH)/cuda_runtime.o: runtime/devices/cuda.cu
$(NVCC) $(NVCC_FLAGS) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS)
# Targets
$(CPU_BIN): $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o
$(CC) $(CFLAGS) -o $(CPU_BIN) $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o $(DEBUG_FLAGS)
$(GPU_BIN): $(GPU_SRC) $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(GPU_OBJ_PATH)/cuda_runtime.o
$(NVCC) $(NVCC_FLAGS) -c -o $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_SRC) $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS)
$(CC) -o $(GPU_BIN) $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_OBJ_PATH)/cuda_runtime.o $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(CUDART_FLAGS) $(CUDA_FLAGS) $(CFLAGS)
clean:
@echo "Cleaning..."
rm -rf build $(CPU_BIN) $(GPU_BIN) $(CPU_SRC) $(GPU_SRC) dist pairs.egg-info functions functions.pdf $(CPU_OBJ_PATH) $(GPU_OBJ_PATH)
......@@ -3,10 +3,6 @@
P4IRS is an open-source, stand-alone compiler and domain-specific language for particle simulations which aims at generating optimized code for different target hardwares.
It is released as a Python package and allows users to define kernels, integrators and other particle routines in a high-level and straightforward fashion without the need to implement any backend code.
## Build instructions
There is a Makefile which contains configurable environment variables such as `TESTCASE` compiler parameters evaluate P4IRS performance on different scenarios.
`TESTCASE` refers to any of the files within the `examples` directory, such as `md` and `dem`.
## Usage
......@@ -104,6 +100,64 @@ else:
psim.generate()
```
## Build instructions
P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment.
In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build`
**General CMake flags (applicable to all 3 modes):**
* Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py`
* Enable CUDA with `-DCOMPILE_CUDA=ON`
* Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule)
### 1. Whole-program generation:
---------------------
In this mode, everything including the `main` function is generated by P4IRS.
1. Set `generate_whole_program=True` in the input script
2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON`
Example: Build [md.py](examples/whole-program-generation/md.py)
```
cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON ..
```
Now call `make` and an **executable** is built.
### 2. Modular stand-alone app
---------------------
You can build a stand-alone C++ app which uses the P4IRS modular interface.
1. Set `generate_whole_program=False` in the input script
2. Set the CMake flag `-DBUILD_APP=ON`
3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp`
Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py)
Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory.
```
cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla ..
```
Now call `make` and an **executable** is built.
### 3. P4IRS as a library
---------------------
In this mode, P4IRS is compiled as a library that can be integrated into other projects.
1. Set `generate_whole_program=False` in the input script
2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default)
Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows:
```cmake
find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH)
target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS})
target_link_directories(my_app PUBLIC ${PAIRS_LINK_DIRS})
target_link_libraries(my_app PUBLIC ${PAIRS_LINK_LIBRARIES})
```
## Citations
TBD
......
set( WALBERLA_DIR WALBERLA_DIR-NOTFOUND CACHE PATH "waLBerla path" )
if ( WALBERLA_DIR )
# WALBERLA_DIR has to point to the waLBerla source directory
# this command builds waLBerla (again) in the current build directory in the subfolder "walberla" (second argument)
......
0,1,0.0,0.0,0.0,0.0,0.0,1.0,13
0,1,0.8,0.015,0.2,0.0,0.0,-1.0,13
0, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 13
0, 1, 0.8, 0.015, 0.2, 0.0, 0.0, -1.0, 13
0, 1, 0, 0, 0, 1, 0, 0, 13
0, 1, 0, 0, 0, 0, 1, 0, 13
0, 1, 0, 0, 0, 0, 0, 1, 13
0, 1, 1, 1, 1, -1, 0, 0, 13
0, 1, 1, 1, 1, 0, -1, 0, 13
0, 1, 1, 1, 1, 0, 0, -1, 13
0, 1, 10, 10, 10, -1, 0, 0, 13
0, 1, 10, 10, 10, 0, -1, 0, 13
0, 1, 10, 10, 10, 0, 0, -1, 13
import pairs
import sys
def linear_spring_dashpot(i, j):
penetration_depth = squared_distance(i, j) - radius[i] - radius[j]
skip_when(penetration_depth >= 0.0)
delta_ij = delta(i, j)
contact_normal = delta_ij / length(delta_ij)
k = radius[j] + 0.5 * penetration_depth
contact_point = position[j] + contact_normal * k
rel_vel = -velocity_wf[i] - velocity_wf[j]
rel_vel_n = dot(rel_vel, contact_normal) * contact_normal
rel_vel_t = rel_vel - rel_vel_n
fN = stiffness_norm[i, j] * (-penetration_depth) * contact_normal + damping_norm[i, j] * rel_vel_n;
tan_spring_disp = tangential_spring_displacement[i, j]
impact_vel_magnitude = impact_velocity_magnitude[i, j]
impact_magnitude = select(impact_vel_magnitude > 0.0, impact_vel_magnitude, length(rel_vel))
sticking = is_sticking[i, j]
rotated_tan_disp = tan_spring_disp - contact_normal * (contact_normal * tan_spring_disp)
new_tan_spring_disp = dt * rel_vel_t + \
select(squared_length(rotated_tan_disp) <= 0.0,
zero_vector(),
rotated_tan_disp * length(tan_spring_disp) / length(rotated_tan_disp))
fTLS = stiffness_tan[i, j] * new_tan_spring_disp + damping_tan[i, j] * rel_vel_t
fTLS_len = length(fTLS)
fTLS_inv_len = 1.0 / fTLS_len
t = select(fTLS_len > 0, fTLS / fTLS_inv_len, zero_vector())
f_friction_abs_static = friction_static[i, j] * length(fN)
f_friction_abs_dynamic = friction_dynamic[i, j] * length(fN)
tan_vel_threshold = 1e-8
cond1 = sticking == 1 and length(rel_vel_t) < tan_vel_threshold and fTLS_len < f_friction_abs_static
cond2 = sticking == 1 and fTLS_len < f_friction_abs_dynamic
f_friction_abs = select(cond1, f_friction_abs_static, f_friction_abs_dynamic)
n_sticking = select(cond1 or cond2 or fTLS_len < f_friction_abs_dynamic, 1, 0)
if not cond1 and not cond2 and stiffness_tan[i, j] > 0.0:
tangential_spring_displacement[i, j] = \
(f_friction_abs * t - damping_tan[i, j] * rel_vel_t) / stiffness_tan[i, j]
else:
tangential_spring_displacement[i, j] = new_tan_spring_disp
impact_velocity_magnitude[i, j] = impact_magnitude
is_sticking[i, j] = n_sticking
fTabs = min(fTLS_len, f_friction_abs)
fT = fTabs * t
force[i] += fN + fT
def euler(i):
velocity[i] += dt * force[i] / mass[i]
position[i] += dt * velocity[i]
cmd = sys.argv[0]
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
if target != 'cpu' and target != 'gpu':
print(f"Invalid target, use {cmd} <cpu/gpu>")
dt = 0.005
cutoff_radius = 2.5
skin = 0.3
ntypes = 4
stiffness_norm = 1.0
stiffness_tan = 1.0
damping_norm = 1.0
damping_tan = 1.0
friction_static = 1.0
friction_dynamic = 1.0
psim = pairs.simulation("dem", debug=True)
psim.add_position('position')
psim.add_property('mass', pairs.double(), 1.0)
psim.add_property('velocity', pairs.vector())
psim.add_property('velocity_wf', pairs.vector())
psim.add_property('force', pairs.vector(), vol=True)
psim.add_property('radius', pairs.double(), 1.0)
psim.add_feature('type', ntypes)
psim.add_feature_property('type', 'stiffness_norm', pairs.double(), [stiffness_norm for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'stiffness_tan', pairs.double(), [stiffness_tan for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'damping_norm', pairs.double(), [damping_norm for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'damping_tan', pairs.double(), [damping_tan for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'friction_static', pairs.double(), [friction_static for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'friction_dynamic', pairs.double(), [friction_dynamic for i in range(ntypes * ntypes)])
psim.add_contact_property('is_sticking', pairs.int32(), False)
psim.add_contact_property('tangential_spring_displacement', pairs.vector(), [0.0, 0.0, 0.0])
psim.add_contact_property('impact_velocity_magnitude', pairs.double(), 0.0)
psim.read_particle_data("data/fluidized_bed.input", ['mass', 'position', 'velocity'])
psim.build_neighbor_lists(cutoff_radius + skin)
psim.vtk_output(f"output/test_{target}")
psim.compute(linear_spring_dashpot, cutoff_radius, symbols={'dt': dt})
psim.compute(euler, symbols={'dt': dt})
if target == 'gpu':
psim.target(pairs.target_gpu())
else:
psim.target(pairs.target_cpu())
psim.generate()
import math
import pairs
import sys
import os
def update_mass_and_inertia(i):
rotation_matrix[i] = diagonal_matrix(1.0)
rotation[i] = default_quaternion()
if is_sphere(i):
inv_inertia[i] = inversed(diagonal_matrix(0.4 * mass[i] * radius[i] * radius[i]))
else:
mass[i] = infinity
inv_inertia[i] = 0.0
def linear_spring_dashpot(i, j):
delta_ij = -penetration_depth(i, j)
skip_when(delta_ij < 0.0)
meff = 1.0 / ((1.0 / mass[i]) + (1.0 / mass[j]))
stiffness_norm = meff * (pi * pi + lnDryResCoeff * lnDryResCoeff) / \
(collisionTime_SI * collisionTime_SI)
damping_norm = -2.0 * meff * lnDryResCoeff / collisionTime_SI
damping_tan = sqrt(kappa) * damping_norm
velocity_wf_i = linear_velocity[i] + cross(angular_velocity[i], contact_point(i, j) - position[i])
velocity_wf_j = linear_velocity[j] + cross(angular_velocity[j], contact_point(i, j) - position[j])
rel_vel = -(velocity_wf_i - velocity_wf_j)
rel_vel_n = dot(rel_vel, contact_normal(i, j))
rel_vel_t = rel_vel - rel_vel_n * contact_normal(i, j)
fNabs = stiffness_norm * delta_ij + damping_norm * rel_vel_n
fN = fNabs * contact_normal(i, j)
fTabs = min(damping_tan * length(rel_vel_t), friction_dynamic[i, j] * fNabs) # static or dynamic?
fT = fTabs * normalized(rel_vel_t)
partial_force = fN + fT
apply(force, partial_force)
apply(torque, cross(contact_point(i, j) - position, partial_force))
def euler(i):
inv_mass = 1.0 / mass[i]
position[i] += 0.5 * inv_mass * force[i] * dt * dt + linear_velocity[i] * dt
linear_velocity[i] += inv_mass * force[i] * dt
wdot = rotation_matrix[i] * (inv_inertia[i] * torque[i]) * transposed(rotation_matrix[i])
phi = angular_velocity[i] * dt + 0.5 * wdot * dt * dt
rotation[i] = quaternion(phi, length(phi)) * rotation[i]
rotation_matrix[i] = quaternion_to_rotation_matrix(rotation[i])
angular_velocity[i] += wdot * dt
def gravity(i):
volume = (4.0 / 3.0) * pi * radius[i] * radius[i] * radius[i]
force[i][2] = force[i][2] - (densityParticle_SI - densityFluid_SI) * volume * gravity_SI
# if type[i]==0:
# printf("gravity", i, volume, force[i][2], volume/2, 2>1)
cmd = sys.argv[0]
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
if target != 'cpu' and target != 'gpu':
print(f"Invalid target, use {cmd} <cpu/gpu>")
# Config file parameters
domainSize_SI=[0.1, 0.1, 0.1]
diameter_SI = 0.009
gravity_SI = 9.81
densityFluid_SI = 1000
densityParticle_SI = 2550
generationSpacing_SI = 0.01
initialVelocity_SI = 1
dt_SI = 5e-5
frictionCoefficient = 0.5
restitutionCoefficient = 0.1
collisionTime_SI = 5e-4
poissonsRatio = 0.22
timeSteps = 1
visSpacing = 200
denseBottomLayer = False
bottomLayerOffsetFactor = 1.0
kappa = 2.0 * (1.0 - poissonsRatio) / (2.0 - poissonsRatio) # from Thornton et al
minDiameter_SI = diameter_SI #* 0.9
maxDiameter_SI = diameter_SI #* 1.1
linkedCellWidth = 1.01 * maxDiameter_SI
ntypes = 1
lnDryResCoeff = math.log(restitutionCoefficient)
frictionStatic = 0.0
frictionDynamic = frictionCoefficient
file_name = os.path.basename(__file__)
file_name_without_extension = os.path.splitext(file_name)[0]
psim = pairs.simulation(
file_name_without_extension,
[pairs.sphere(), pairs.halfspace()],
timesteps=timeSteps,
double_prec=True,
use_contact_history=False,
particle_capacity=1000000,
neighbor_capacity=20,
debug=True, generate_whole_program=False)
if target == 'gpu':
psim.target(pairs.target_gpu())
else:
psim.target(pairs.target_cpu())
#psim.target(pairs.target_cpu(parallel=True))
psim.add_position('position')
psim.add_property('mass', pairs.real(), 1.0)
psim.add_property('linear_velocity', pairs.vector())
psim.add_property('angular_velocity', pairs.vector())
psim.add_property('force', pairs.vector(), volatile=True)
psim.add_property('torque', pairs.vector(), volatile=True)
psim.add_property('hydrodynamic_force', pairs.vector(), reduce=True)
psim.add_property('hydrodynamic_torque', pairs.vector(), reduce=True)
# psim.add_property('old_hydrodynamic_force', pairs.vector())
# psim.add_property('old_hydrodynamic_torque', pairs.vector())
psim.add_property('radius', pairs.real(), 1.0)
psim.add_property('normal', pairs.vector())
psim.add_property('inv_inertia', pairs.matrix())
psim.add_property('rotation_matrix', pairs.matrix())
psim.add_property('rotation', pairs.quaternion())
psim.add_feature('type', ntypes)
psim.add_feature_property('type', 'friction_static', pairs.real(), [frictionStatic for i in range(ntypes * ntypes)])
psim.add_feature_property('type', 'friction_dynamic', pairs.real(), [frictionDynamic for i in range(ntypes * ntypes)])
# psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]])
psim.set_domain_partitioner(pairs.block_forest())
# psim.set_domain_partitioner(pairs.regular_domain_partitioner())
psim.pbc([False, False, False])
psim.dem_sc_grid(
domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI,
diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes)
#psim.read_particle_data(
# "data/spheres.input",
# "data/spheres_4x4x2.input",
# "data/spheres_6x6x2.input",
# "data/spheres_8x8x2.input",
# ['uid', 'type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'],
# pairs.sphere())
#psim.read_particle_data(
# "data/spheres_bottom.input",
# ['type', 'mass', 'radius', 'position', 'linear_velocity', 'flags'],
# pairs.sphere())
# psim.read_particle_data(
# "data/planes.input",
# ['type', 'mass', 'position', 'normal', 'flags'],
# pairs.halfspace())
psim.setup(update_mass_and_inertia, {'densityParticle_SI': densityParticle_SI,
'pi': math.pi,
'infinity': math.inf })
#psim.compute_half()
psim.build_cell_lists(linkedCellWidth)
psim.vtk_output(f"output/dem_{target}", frequency=visSpacing)
psim.compute(gravity,
symbols={'densityParticle_SI': densityParticle_SI,
'densityFluid_SI': densityFluid_SI,
'gravity_SI': gravity_SI,
'pi': math.pi })
psim.compute(linear_spring_dashpot,
linkedCellWidth,
symbols={'pi': math.pi,
'kappa': kappa,
'lnDryResCoeff': lnDryResCoeff,
'collisionTime_SI': collisionTime_SI})
psim.compute(euler, parameters={'dt' : pairs.real()})
# psim.compute(euler, symbols={'dt' : dt_SI})
psim.generate()
from coupling.parse_cpp import parse_walberla_file
from coupling.parse_cpp import get_class_method, print_tree
filename = "mesa_pd/kernel/SpringDashpot.hpp"
translation_unit = parse_walberla_file(filename)
# subtree = get_subtree(tu.cursor, "walberla::mesa_pd::kernel")
# print_tree(subtree)
kernel = get_class_method(
translation_unit.cursor,
"walberla::mesa_pd::kernel::SpringDashpot",
"operator()")
print_tree(kernel)
import pairs
dt = 0.005
cutoff_radius = 2.5
skin = 0.3
sigma = 1.0
epsilon = 1.0
sigma6 = sigma ** 6
psim = pairs.simulation("lj")
mass = psim.add_real_property('mass', 1.0)
position = psim.add_vector_property('position')
velocity = psim.add_vector_property('velocity')
force = psim.add_vector_property('force', vol=True)
psim.from_file("data/minimd_setup_4x4x4.input", ['mass', 'position', 'velocity'])
psim.create_cell_lists(2.8, 2.8)
psim.create_neighbor_lists()
psim.periodic(2.8)
psim.vtk_output("output/test")
for i, j, delta, rsq in psim.particle_pairs(cutoff_radius, position):
sr2 = 1.0 / rsq
sr6 = sr2 * sr2 * sr2 * sigma6
f = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon
force[i].add(delta * f)
for i in psim.particles():
velocity[i].add(dt * force[i] / mass[i])
position[i].add(dt * velocity[i])
psim.generate()
import pairs
import sys
def lj(i, j):
sr2 = 1.0 / rsq
sr6 = sr2 * sr2 * sr2 * sigma6
force[i] += delta * 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon
def euler(i):
velocity[i] += dt * force[i] / mass[i]
position[i] += dt * velocity[i]
cmd = sys.argv[0]
target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
if target != 'cpu' and target != 'gpu':
print(f"Invalid target, use {cmd} <cpu/gpu>")
dt = 0.005
cutoff_radius = 2.5
skin = 0.3
sigma = 1.0
epsilon = 1.0
sigma6 = sigma ** 6
psim = pairs.simulation("lj", debug=True)
psim.add_real_property('mass', 1.0)
psim.add_position('position')
psim.add_vector_property('velocity')
psim.add_vector_property('force', vol=True)
psim.from_file("data/minimd_setup_32x32x32.input", ['mass', 'position', 'velocity'])
psim.build_neighbor_lists(cutoff_radius + skin)
psim.vtk_output(f"output/test_{target}")
psim.compute(lj, cutoff_radius, {'sigma6': sigma6, 'epsilon': epsilon})
psim.compute(euler, symbols={'dt': dt})
if target == 'gpu':
psim.target(pairs.target_gpu())
else:
psim.target(pairs.target_cpu())
psim.generate()
#include <iostream>
//---
#include "dem_sd.hpp"
#include <blockforest/BlockForest.h>
#include <blockforest/Initialization.h>
int main(int argc, char **argv) {
double dt = 5e-5;
auto pairs_sim = std::make_shared<PairsSimulation>();
auto pairs_acc = std::make_shared<PairsAccessor>(pairs_sim);
// Create forest (make sure to use_domain(forest)) ----------------------------------------------
// walberla::math::AABB domain(0, 0, 0, 0.1, 0.1, 0.1);
// std::shared_ptr<walberla::mpi::MPIManager> mpiManager = walberla::mpi::MPIManager::instance();
// mpiManager->initializeMPI(&argc, &argv);
// mpiManager->useWorldComm();
// auto procs = mpiManager->numProcesses();
// auto block_config = walberla::Vector3<int>(2, 2, 2);
// auto ref_level = 0;
// std::shared_ptr<walberla::BlockForest> forest = walberla::blockforest::createBlockForest(
// domain, block_config, walberla::Vector3<bool>(false, false, false), procs, ref_level);
//-----------------------------------------------------------------------------------------------
// initialize pairs data structures ----------------------------------------------
pairs_sim->initialize();
// either create new domain or use an existing one ----------------------------------------
pairs_sim->set_domain(argc, argv, 0, 0, 0, 0.1, 0.1, 0.1);
// pairs_sim->use_domain(forest);
// create planes and particles ------------------------------------------------------------
pairs_sim->create_halfspace(0, 0, 0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0, 0, 0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0, 0, 0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(0.1, 0.1, 0.1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0.1, 0.1, 0.1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(0.1, 0.1, 0.1, 0, 0, -1, 0, 13);
// pairs::id_t pUid = pairs_sim->create_sphere(0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0);
// pairs::id_t pUid2 = pairs_sim->create_sphere(0.0499, 0.0499, 0.0499, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0);
// Tracking a particle ------------------------------------------------------------------------
// if (pUid != pairs_acc->getInvalidUid()){
// std::cout<< "Particle " << pUid << " is created in rank " << rank << std::endl;
// }
// walberla::mpi::allReduceInplace(pUid, walberla::mpi::SUM);
// walberla::mpi::allReduceInplace(pUid2, walberla::mpi::SUM);
// MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
// MPI_Allreduce(MPI_IN_PLACE, &pUid2, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
// if (pUid != pairs_acc->getInvalidUid()){
// std::cout<< "Particle " << pUid << " will be tracked by rank " << rank << std::endl;
// }
// auto pIsLocalInMyRank = [&](pairs::id_t uid){return pairs_acc->uidToIdx(uid) != pairs_acc->getInvalidIdx();};
// auto pIsGhostInMyRank = [&](pairs::id_t uid){return pairs_acc->uidToIdxGhost(uid) != pairs_acc->getInvalidIdx();};
// TODO: make sure linkedCellWidth is larger than max diameter in the system
// setup particles, setup functions, and the cell list stencil-------------------------------
pairs_sim->setup_sim();
// for(int i=0; i<pairs_acc->size(); ++i){
// if(pairs_acc->getShape(i) == 0){
// std::cout<< "rank: " << rank << " sphere " << pairs_acc->getUid(i) << " " << pairs_acc->getPosition(i) << std::endl;
// }
// else if(pairs_acc->getShape(i) == 1){
// std::cout<< "rank: " << rank << " halfspace " << pairs_acc->getUid(i) << " " << pairs_acc->getPosition(i) << pairs_acc->getNormal(i) << std::endl;
// }
// }
// if (pIsLocalInMyRank(pUid)){
// int idx = pairs_acc->uidToIdx(pUid);
// pairs_acc->setPosition(idx, walberla::Vector3<double>(0.01, 0.01, 0.08));
// pairs_acc->setLinearVelocity(idx, walberla::Vector3<double>(0.5, 0.5, 0.5));
// }
for (int t=0; t<5000; ++t){
// if ((t%200==0) && (pIsLocalInMyRank(pUid))){
// int idx = pairs_acc->uidToIdx(pUid);
// std::cout<< "Tracked particle is now in rank " << rank << " --- " << pairs_acc->getPosition(idx)<< std::endl;
// }
pairs_sim->communicate(t);
// if(pairs_sim->rank() == 0){
// pairs_sim->pairs_runtime->copyPropertyToHost(0, ReadOnly, (((pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost) * 1) * sizeof(unsigned long long int))); // uid
// for(int i=0; i<pairs_sim->pobj->nlocal; ++i){
// std::cout << "rank ##0## local uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl;
// }
// }
// if(pairs_sim->rank() == 2){
// pairs_sim->pairs_runtime->copyPropertyToHost(0, ReadOnly, (((pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost) * 1) * sizeof(unsigned long long int))); // uid
// for(int i=0; i<pairs_sim->pobj->nlocal; ++i){
// std::cout << "rank 2 local uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl;
// }
// for(int i=pairs_sim->pobj->nlocal; i<pairs_sim->pobj->nlocal + pairs_sim->pobj->nghost; ++i){
// std::cout << "rank 2 ghost uid[" << i << "] = " << pairs_sim->pobj->uid[i] << std::endl;
// }
// }
// std::cout << pairs_sim->rank() << " --------- nlocal = " << pairs_acc->nlocal() << " nghost = " << pairs_acc->nghost() << std::endl;
// if (pairs_sim->rank() == 2) {
// for(int i=0; i<pairs_acc->nlocal(); ++i){
// std::cout << pairs_sim->rank() << " Local " << i << " -- uid= " << pairs_acc->getUid(i) << std::endl;
// }
// for(int i=pairs_acc->nlocal(); i<pairs_acc->size(); ++i){
// std::cout << pairs_sim->rank() << " Ghost -- " << i << " -- uid= " << pairs_acc->getUid(i) << std::endl;
// }
// }
// if (pIsLocalInMyRank(pUid)){
// int idx = pairs_acc->uidToIdx(pUid);
// pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(1, 1, 1));
// pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(2, 2, 2));
// }
// if (pIsLocalInMyRank(pUid2)){
// int idx = pairs_acc->uidToIdx(pUid2);
// pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(10, 10, 10));
// pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(20, 20, 20));
// }
pairs_sim->update_cells();
pairs_sim->gravity();
pairs_sim->linear_spring_dashpot();
pairs_sim->euler(dt);
// std::cout << "reset_volatiles" << std::endl;
pairs_sim->reset_volatiles();
pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_acc->nlocal(), t, 200);
pairs_sim->vtk_write("output/dem_sd_ghost", pairs_acc->nlocal(), pairs_acc->size(), t, 200);
// if (pIsGhostInMyRank(pUid)){
// std::cout << pairs_sim->rank() << " - ghost 1: " << pUid << std::endl;
// int idx = pairs_acc->uidToIdxGhost(pUid);
// pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(1, 1, 1));
// pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(3, 3, 3));
// }
// if (pIsGhostInMyRank(pUid2)){
// std::cout << pairs_sim->rank() << " - ghost 2: " << pUid2 << std::endl;
// int idx = pairs_acc->uidToIdxGhost(pUid2);
// pairs_acc->setHydrodynamicForce(idx, walberla::Vector3<double>(2, 2, 2));
// pairs_acc->setHydrodynamicTorque(idx, walberla::Vector3<double>(4, 4, 4));
// }
// std::cout << "reverse comm and reduce" << std::endl;
// pairs_sim->reverse_comm();
// if (pIsLocalInMyRank(pUid)){
// int idx = pairs_acc->uidToIdx(pUid);
// auto forceSum = pairs_acc->getHydrodynamicForce(idx);
// auto torqueSum = pairs_acc->getHydrodynamicTorque(idx);
// std::cout << pUid << " @@@@ reduced force = " << forceSum << std::endl;
// std::cout << pUid << " @@@@ reduced torque = " << torqueSum << std::endl;
// }
// if (pIsLocalInMyRank(pUid2)){
// int idx = pairs_acc->uidToIdx(pUid2);
// auto forceSum = pairs_acc->getHydrodynamicForce(idx);
// auto torqueSum = pairs_acc->getHydrodynamicTorque(idx);
// std::cout << pUid2 << " @@@@ reduced force = " << forceSum << std::endl;
// std::cout << pUid2 << " @@@@ reduced torque = " << torqueSum << std::endl;
// }
}
pairs_sim->end();
return 0;
}
#include <iostream>
#include <cuda_runtime.h>
void checkCudaError(cudaError_t err, const char* func) {
if (err != cudaSuccess) {
fprintf(stderr, "CUDA error in %s: %s\n", func, cudaGetErrorString(err));
exit(err);
}
}
template< typename Type >
class PairsVector3 {
public:
PairsVector3() = default;
// If the constructor is called from device, v_ is automatically allocated on
// device because it's a static array embeded in the object itself
__host__ __device__ PairsVector3( Type x, Type y, Type z ) {
v_[0] = x;
v_[1] = y;
v_[2] = z;
}
__host__ __device__ Type& operator[]( int index ) {
return v_[index];
}
__host__ __device__ const Type& operator[] ( int index ) const {
return v_[index];
}
private:
Type v_[3] = {Type(), Type(), Type()};
};
struct PairsObjects {
double *position_h;
double *position_d;
};
class PairsAccessor{
private:
PairsObjects *pobj_h;
PairsObjects *pobj_d;
public:
// PairsAccessor is only constructable from host, but its getters and setters can be called from both host and device
__host__ PairsAccessor(PairsObjects *pobj_): pobj_h(pobj_) {
// NOTE: Here we copy pobj_h to device, but we will use pobj_d to ONLY work with device pointers
// So for example, we only try to access pobj_d->position_d, NOT pobj_d->position_h
// NOTE: What's copied to device here is the PairsObject, which is a bunch of pointers (including valid device pointers like position_d)
// TODO (maybe): Split PairsObjects into two structs, one holding host pointers, the other holding device pointers.
cudaMalloc(&pobj_d, sizeof(PairsObjects));
cudaMemcpy(pobj_d, pobj_h, sizeof(PairsObjects), cudaMemcpyHostToDevice);
}
__host__ __device__ int getTest(){
#ifdef __CUDA_ARCH__
return 12;
#else
return 34;
#endif
}
// If this function is called from device, it returns a PairsVector3 that's constructed on device
// If this function is called from host, it returns a PairsVector3 that's constructed on host
__host__ __device__ PairsVector3<double> getPosition(const size_t i) const {
#ifdef __CUDA_ARCH__
// Assume postion_d points to uptodate data (we can't do copyPropertyToDevice from __device__ )
return PairsVector3<double>(pobj_d->position_d[i*3 + 0], pobj_d->position_d[i*3 + 1], pobj_d->position_d[i*3 + 2]);
#else
// Here we can do copyPropertyToHost (ReadOnly) if needed, to make sure position_h has uptodate data
return PairsVector3<double>(pobj_h->position_h[i*3 + 0], pobj_h->position_h[i*3 + 1], pobj_h->position_h[i*3 + 2]);
#endif
}
__host__ __device__ void setPosition(const size_t i, PairsVector3<double> const &vec) {
#ifdef __CUDA_ARCH__
// Assume vec is on device
pobj_d->position_d[i*3 + 0] = vec[0];
pobj_d->position_d[i*3 + 1] = vec[1];
pobj_d->position_d[i*3 + 2] = vec[2];
// Assume we don't need position_h data back on host (we can't do copyPropertyToHost from __device__)
#else
// Assume vec is on host
pobj_h->position_h[i*3 + 0] = vec[0];
pobj_h->position_h[i*3 + 1] = vec[1];
pobj_h->position_h[i*3 + 2] = vec[2];
// Here we can do copyPropertyToDevice (WriteOnly) if needed (just so host and device data match)
#endif
}
};
__global__ void mykernel(PairsAccessor ac){
printf("getTest from device = %d\n", ac.getTest());
PairsVector3<double> pos(7,8,9);
ac.setPosition(0, pos);
printf("getPosition(0) from device = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]);
}
int main(int argc, char **argv) {
PairsObjects *pobj = new PairsObjects;
// User doesn't bother with the stuff below, they are done when PairsSimulation is initialized
//----------------------------------------------------------------------------------------------------
int numParticles = 1;
int numElements = numParticles * 3;
pobj->position_h = new double[numElements];
cudaMalloc(&pobj->position_d, numElements * sizeof(double));
cudaMemcpy(pobj->position_d, pobj->position_h, numElements * sizeof(double), cudaMemcpyHostToDevice);
//----------------------------------------------------------------------------------------------------
PairsAccessor ac(pobj);
printf("getTest from host = %d\n", ac.getTest());
PairsVector3<double> pos(1.2, 3.4, 5.6);
ac.setPosition(0, pos);
printf("getPosition(0) from host = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]);
mykernel<<<1,1>>>(ac);
checkCudaError(cudaDeviceSynchronize(), "mykernel");
// In mykernel, we modify the position of the particle ON DEVICE
// TODO: To reflect this modification on the host, we need a 'sync' function along with getters and setters (eg: 'syncPosition')
// to make sure both host and device data are uptodate and in sync with eachother.
// But unlike getters and setters, sync functions are only callable form host
// The 'sync' function copies the property to device if its host flag is set, or copies property to host if its device flag is set
// If setter is called from host, set host flag and unset device flag for that property
// If setter is called from device, set device flag and unset host flag for that property
}
......@@ -9,10 +9,11 @@ int main(int argc, char **argv) {
auto ac = std::make_shared<PairsAccessor>(pairs_sim.get());
// Set domain
pairs_sim->set_domain(argc, argv, 0, 0, 0, 0.1, 0.1, 0.1);
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 0.1, 0.1, 0.1);
// Create bodies
pairs::id_t pUid = pairs_sim->create_sphere(0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0);
pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.0499, 0.0499, 0.07, 0.5, 0.5, 0 , 1000, 0.0045, 0, 0);
// setup_sim after creating all bodies
pairs_sim->setup_sim();
......@@ -93,8 +94,7 @@ int main(int argc, char **argv) {
pairs_sim->update_cells(t);
pairs_sim->gravity();
pairs_sim->spring_dashpot();
pairs_sim->euler(5e-5);
pairs_sim->reset_volatiles();
pairs_sim->euler(5e-5);
//-------------------------------------------------------------------------------------------
std::cout << "---- reverse_comm and reduce ----" << std::endl;
......
......@@ -8,16 +8,18 @@ int main(int argc, char **argv) {
auto pairs_sim = std::make_shared<PairsSimulation>();
pairs_sim->initialize();
pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1);
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1);
pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13);
pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1);
pairs_sim->update_mass_and_inertia();
......@@ -37,10 +39,8 @@ int main(int argc, char **argv) {
pairs_sim->spring_dashpot();
pairs_sim->euler(dt);
pairs_sim->reset_volatiles();
pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq);
pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_1_local", 0, pairs_sim->nlocal(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_1_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq);
}
pairs_sim->end();
......
......@@ -30,16 +30,17 @@ int main(int argc, char **argv) {
// Pass forest to P4IRS
// -------------------------------------------------------------------------------
pairs_sim->use_domain(forest);
pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->useDomain(forest);
pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13);
pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->setup_sim(0.1, 0.1, 0.1, 0.1);
pairs_sim->update_mass_and_inertia();
......@@ -59,10 +60,8 @@ int main(int argc, char **argv) {
pairs_sim->spring_dashpot();
pairs_sim->euler(dt);
pairs_sim->reset_volatiles();
pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq);
pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_2_local", 0, pairs_sim->nlocal(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_2_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq);
}
pairs_sim->end();
......
......@@ -14,18 +14,19 @@ int main(int argc, char **argv) {
pairs_sim->initialize();
auto ac = std::make_shared<PairsAccessor>(pairs_sim.get());
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1);
pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1);
pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13);
pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
pairs::id_t pUid = pairs::create_sphere(pairs_runtime ,0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 0, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 0, 0);
MPI_Allreduce(MPI_IN_PLACE, &pUid, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);
......@@ -81,14 +82,13 @@ int main(int argc, char **argv) {
// Euler
//-------------------------------------------------------------------------------------------
pairs_sim->euler(dt);
pairs_sim->reset_volatiles();
// Communicate
//-------------------------------------------------------------------------------------------
pairs_sim->communicate(t);
pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq);
pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_local", 0, ac->nlocal(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/sd_3_CPU_ghost", ac->nlocal(), ac->size(), t, vtk_freq);
}
pairs_sim->end();
......
......@@ -46,17 +46,18 @@ int main(int argc, char **argv) {
// Create PairsAccessor after PairsSimulation is initialized
auto ac = std::make_shared<PairsAccessor>(pairs_sim.get());
pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1);
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->initDomain(&argc, &argv, 0, 0, 0, 1, 1, 1);
pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 0,0,0, 0, 0, 1, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, -1, 0, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, -1, 0, 0, 13);
pairs::create_halfspace(pairs_runtime, 1,1,1, 0, 0, -1, 0, 13);
pairs::id_t pUid = pairs_sim->create_sphere(0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0);
pairs::id_t pUid = pairs::create_sphere(pairs_runtime, 0.6, 0.6, 0.7, 0, 0, 0, 1000, 0.05, 1, 0);
pairs::create_sphere(pairs_runtime, 0.4, 0.4, 0.76, 2, 2, 0, 1000, 0.05, 1, 0);
set_feature_properties(ac);
......@@ -74,7 +75,6 @@ int main(int argc, char **argv) {
int num_timesteps = 2000;
int vtk_freq = 20;
double dt = 1e-3;
pairs_sim->vtk_write_subdom("output/subdom", 0, 1);
for (int t=0; t<num_timesteps; ++t){
// Up-to-date uids might be on host or device. So sync uid in Host before accessing them from host
......@@ -137,7 +137,6 @@ int main(int argc, char **argv) {
// Euler
//-------------------------------------------------------------------------------------------
pairs_sim->euler(dt);
pairs_sim->reset_volatiles();
// Communicate
//-------------------------------------------------------------------------------------------
......@@ -145,8 +144,8 @@ int main(int argc, char **argv) {
// PairsAccessor requires an update when particles are communicated
ac->update();
pairs_sim->vtk_write("output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq);
pairs_sim->vtk_write("output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/dem_sd_local", 0, ac->nlocal(), t, vtk_freq);
pairs::vtk_write_data(pairs_runtime, "output/dem_sd_ghost", ac->nlocal(), ac->size(), t, vtk_freq);
}
pairs_sim->end();
......
......@@ -40,7 +40,7 @@ int main(int argc, char **argv) {
auto pairs_runtime = pairs_sim->getPairsRuntime();
pairs_runtime->initDomain(&argc, &argv,
0, 20, 0, 20, 0, 20, // Domain bounds
0, 0, 0, 20, 20, 20, // Domain bounds
false, false, false, // PBCs --------------> TODO: runtime pbc
true // Enable dynamic load balancing (does initial refinement on a <1,1,1> blockforest)
);
......
#include <iostream>
#include <memory>
#include "spring_dashpot.hpp"
int main(int argc, char **argv) {
auto pairs_sim = std::make_shared<PairsSimulation>();
pairs_sim->initialize();
pairs_sim->set_domain(argc, argv, 0, 0, 0, 1, 1, 1);
pairs_sim->create_halfspace(0,0,0, 1, 0, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 1, 0, 0, 13);
pairs_sim->create_halfspace(0,0,0, 0, 0, 1, 0, 13);
pairs_sim->create_halfspace(1,1,1, -1, 0, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, -1, 0, 0, 13);
pairs_sim->create_halfspace(1,1,1, 0, 0, -1, 0, 13);
pairs_sim->create_sphere(0.6, 0.6, 0.7, -2, -2, 0, 1000, 0.05, 0, 0);
pairs_sim->create_sphere(0.4, 0.4, 0.68, 2, 2, 0, 1000, 0.05, 0, 0);
pairs_sim->setup_sim();
pairs_sim->update_mass_and_inertia();
int num_timesteps = 2000;
int vtk_freq = 20;
double dt = 1e-3;
for (int t=0; t<num_timesteps; ++t){
if ((t%500==0) && pairs_sim->rank()==0) std::cout << "Timestep: " << t << std::endl;
pairs_sim->communicate(t);
pairs_sim->update_cells(t);
pairs_sim->gravity();
pairs_sim->spring_dashpot();
pairs_sim->euler(dt);
pairs_sim->reset_volatiles();
pairs_sim->vtk_write("output/dem_sd_local", 0, pairs_sim->nlocal(), t, vtk_freq);
pairs_sim->vtk_write("output/dem_sd_ghost", pairs_sim->nlocal(), pairs_sim->size(), t, vtk_freq);
}
pairs_sim->end();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment