diff --git a/CMakeLists.txt b/CMakeLists.txt index 67d70743c918a04b7b17af09d987599ab3d5c67e..ce1ea95414ae5469c6a5a92e1a4be74f4bd699d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,5 @@ cmake_minimum_required(VERSION 3.8.2 FATAL_ERROR) - project(pairs CXX) -#enable_testing() - -#find_package(Python COMPONENTS Interpreter Development) set(TESTCASE ${TESTCASE} CACHE STRING "Select the testcase from the following: md, dem") @@ -20,11 +16,23 @@ endif() string(TOLOWER "${TESTCASE}" TESTCASE) message(STATUS "Selected testcase: ${TESTCASE}") +option(DEBUG "DEBUG" ON) option(USE_WALBERLA "USE_WALBERLA" ON) option(USE_MPI "USE_MPI" ON) option(COMPILE_CUDA "COMPILE_CUDA" ON) option(ENABLE_GPU_DIRECT "ENABLE_GPU_DIRECT" ON) +if(DEBUG) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG ") +endif() + +if(USE_WALBERLA) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING ") + find_package(waLBerla REQUIRED) + add_subdirectory(${walberla_SOURCE_DIR} ${walberla_BINARY_DIR} EXCLUDE_FROM_ALL) + waLBerla_import() +endif() + set(CPU_SRC "${TESTCASE}.cpp") set(GPU_SRC "${TESTCASE}.cu") set(CPU_BIN "${TESTCASE}_cpu") @@ -51,42 +59,38 @@ execute_process( COMMAND ${PYTHON_EXECUTABLE} setup.py install --user WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -add_executable(${CPU_BIN} ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES}) - if(USE_WALBERLA) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_WALBERLA_LOAD_BALANCING ") - find_package(waLBerla REQUIRED) - add_subdirectory(${walberla_SOURCE_DIR} ${walberla_BINARY_DIR} EXCLUDE_FROM_ALL) - waLBerla_import() - waLBerla_add_executable( NAME ${CPU_BIN} - FILES ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES} - DEPENDS blockforest core pe ${CPU_SRC}) + FILES ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES} ${CMAKE_BINARY_DIR}/${CPU_SRC} + DEPENDS blockforest core pe) +else() + add_executable(${CPU_BIN} ${CPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_CPU_FILES}) endif() add_library(runtime_cpu STATIC runtime/devices/dummy.cpp) -#target_link_libraries(${CPU_BIN} runtime_cpu) +target_link_libraries(${CPU_BIN} runtime_cpu) add_custom_command( - OUTPUT ${CPU_SRC} + OUTPUT ${CMAKE_BINARY_DIR}/${CPU_SRC} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py cpu COMMENT "Generate CPU code" DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py) -add_custom_target(gen_cpu DEPENDS ${CPU_SRC}) +add_custom_target(gen_cpu DEPENDS ${CMAKE_BINARY_DIR}/${CPU_SRC}) add_dependencies(${CPU_BIN} gen_cpu) if(COMPILE_CUDA) find_package(CUDA REQUIRED) enable_language(CUDA) - add_executable(${GPU_BIN} ${GPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_GPU_FILES}) if(USE_WALBERLA) waLBerla_add_executable( NAME ${GPU_BIN} - FILES ${GPU_SRC} ${RUNTIME_COMMON_FILES} - DEPENDS blockforest core pe ${GPU_SRC}) + FILES ${RUNTIME_COMMON_FILES} + DEPENDS blockforest core pe) + else() + add_executable(${GPU_BIN} ${GPU_SRC} ${RUNTIME_COMMON_FILES} ${RUNTIME_GPU_FILES}) endif() if(ENABLE_GPU_DIRECT) @@ -97,6 +101,7 @@ if(COMPILE_CUDA) target_compile_features(runtime_gpu PUBLIC cxx_std_11) set_target_properties(runtime_gpu PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + target_link_libraries(${GPU_BIN} runtime_gpu) target_compile_options(${GPU_BIN} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-arch=${CUDA_ARCH}>) target_include_directories(${GPU_BIN} PRIVATE ${CUDA_INCLUDE_DIRS}) set_target_properties(${GPU_BIN} PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCH}) @@ -105,9 +110,6 @@ if(COMPILE_CUDA) target_include_directories(runtime_gpu PRIVATE ${CUDA_INCLUDE_DIRS}) set_target_properties(runtime_gpu PROPERTIES CUDA_ARCHITECTURES ${CUDA_ARCH}) - #set_property(TARGET ${GPU_BIN} PROPERTY CUDA_SEPARABLE_COMPILATION ON) - #target_link_libraries(${GPU_BIN} runtime_gpu) - add_custom_command( OUTPUT ${GPU_SRC} COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/examples/${TESTCASE}.py gpu @@ -122,15 +124,19 @@ target_link_libraries(${CPU_BIN} ${CMAKE_EXE_LINKER_FLAGS}) set_target_properties(${CPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON) set_target_properties(${CPU_BIN} PROPERTIES CXX_STANDARD 17) -target_link_libraries(${GPU_BIN} ${CMAKE_EXE_LINKER_FLAGS}) -set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON) -set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD 17) +if(COMPILE_CUDA) + target_link_libraries(${GPU_BIN} ${CMAKE_EXE_LINKER_FLAGS}) + set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD_REQUIRED ON) + set_target_properties(${GPU_BIN} PROPERTIES CXX_STANDARD 17) +endif() if(USE_MPI) find_package(MPI REQUIRED) include_directories(SYSTEM ${MPI_INCLUDE_PATH}) target_link_libraries(${CPU_BIN} ${MPI_LIBRARIES}) - target_link_libraries(${GPU_BIN} ${MPI_LIBRARIES}) + if(COMPILE_CUDA) + target_link_libraries(${GPU_BIN} ${MPI_LIBRARIES}) + endif() endif() if(LIKWID_DIR) diff --git a/examples/md.py b/examples/md.py index 30a669a839e26869071d56b74c282f9124619667..828321c918a2f3119b28ae1a54f187fb5dba0ac0 100644 --- a/examples/md.py +++ b/examples/md.py @@ -35,7 +35,7 @@ nz = 32 rho = 0.8442 temp = 1.44 -psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True) +psim = pairs.simulation("md", [pairs.point_mass()], timesteps=200, double_prec=True, debug=True) if target == 'gpu': psim.target(pairs.target_gpu()) diff --git a/runtime/boundary_weights.hpp b/runtime/boundary_weights.hpp index 3678c899f46c1ae75820545016beb2e72ca611f6..7c7a7d86f8508b0ab10a2a66bd7929b03e1efb0a 100644 --- a/runtime/boundary_weights.hpp +++ b/runtime/boundary_weights.hpp @@ -66,6 +66,7 @@ void compute_boundary_weights( (*comm_weight)++; } } + std::cout << "comp_weight = " << (*comp_weight) << ", comm_weight = " << (*comm_weight) << std::endl; #else real_t *position_ptr = static_cast<real_t *>(position_prop.getDevicePointer()); diff --git a/runtime/domain/block_forest.cpp b/runtime/domain/block_forest.cpp index 8b8d5b5d5862d7fbe23db0a77a7d46edde1ce461..e2d73070808cf27bd1c17b52be91d8c4d5fc3b5a 100644 --- a/runtime/domain/block_forest.cpp +++ b/runtime/domain/block_forest.cpp @@ -79,14 +79,14 @@ void BlockForest::updateNeighborhood() { } void BlockForest::copyRuntimeArray(const std::string& name, void *dest, const int size) { - void *src = name.compare("ranks") ? static_cast<void *>(ranks.data()) : - name.compare("naabbs") ? static_cast<void *>(naabbs.data()) : - name.compare("aabb_offsets") ? static_cast<void *>(aabb_offsets.data()) : - name.compare("aabbs") ? static_cast<void *>(aabbs.data()) : - name.compare("subdom") ? static_cast<void *>(subdom) : nullptr; + void *src = name.compare("ranks") == 0 ? static_cast<void *>(ranks.data()) : + name.compare("naabbs") == 0 ? static_cast<void *>(naabbs.data()) : + name.compare("aabb_offsets") == 0 ? static_cast<void *>(aabb_offsets.data()) : + name.compare("aabbs") == 0 ? static_cast<void *>(aabbs.data()) : + name.compare("subdom") == 0 ? static_cast<void *>(subdom) : nullptr; PAIRS_ASSERT(src != nullptr); - bool is_real = name.compare("aabbs") || name.compare("subdom"); + bool is_real = (name.compare("aabbs") == 0) || (name.compare("subdom") == 0); int tsize = is_real ? sizeof(real_t) : sizeof(int); std::memcpy(dest, src, size * tsize); } @@ -247,6 +247,11 @@ void BlockForest::initialize(int *argc, char ***argv) { if(balance_workload) { this->initializeWorkloadBalancer(); + } +} + +void BlockForest::update() { + if(balance_workload) { this->updateWeights(); forest->refresh(); } diff --git a/runtime/domain/block_forest.hpp b/runtime/domain/block_forest.hpp index 80d63d59f40d9e75389c9d650fe152e19db9cb79..673369d6bb925317de43717c5bc73cd5e8c3fa9c 100644 --- a/runtime/domain/block_forest.hpp +++ b/runtime/domain/block_forest.hpp @@ -52,6 +52,7 @@ public: } void initialize(int *argc, char ***argv); + void update(); void finalize(); int getWorldSize() const { return world_size; } int getRank() const { return rank; } diff --git a/runtime/domain/domain_partitioning.hpp b/runtime/domain/domain_partitioning.hpp index 5806f3a00d281dc4dac283a9e84753de854ae684..77af9b8569e8dafbc0015974ab3fb6ec0383d635 100644 --- a/runtime/domain/domain_partitioning.hpp +++ b/runtime/domain/domain_partitioning.hpp @@ -38,6 +38,7 @@ public: } virtual void initialize(int *argc, char ***argv) = 0; + virtual void update() = 0; virtual int getWorldSize() const = 0; virtual int getRank() const = 0; virtual int getNumberOfNeighborAABBs() = 0; diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp index aa276793f75ab163e69e1dae067fdbc7c7aab151..ae66aca0db373bb5814182ab5bbf7018713e2c30 100644 --- a/runtime/domain/regular_6d_stencil.cpp +++ b/runtime/domain/regular_6d_stencil.cpp @@ -89,6 +89,8 @@ void Regular6DStencil::initialize(int *argc, char ***argv) { this->setBoundingBox(); } +void Regular6DStencil::update() {} + void Regular6DStencil::finalize() { MPI_Finalize(); } @@ -101,19 +103,19 @@ int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) { void Regular6DStencil::copyRuntimeArray(const std::string& name, void *dest, const int size) { for(int d = 0; d < ndims; d++) { - if(name.compare("neighbor_ranks")) { + if(name.compare("neighbor_ranks") == 0) { int *neighbor_ranks = static_cast<int *>(dest); neighbor_ranks[d * 2 + 0] = prev[d]; neighbor_ranks[d * 2 + 1] = next[d]; } - if(name.compare("pbc")) { + if(name.compare("pbc") == 0) { int *pbc = static_cast<int *>(dest); pbc[d * 2 + 0] = pbc_prev[d]; pbc[d * 2 + 1] = pbc_next[d]; } - if(name.compare("subdom")) { + if(name.compare("subdom") == 0) { real_t *subdom = static_cast<real_t *>(dest); subdom[d * 2 + 0] = subdom_min[d]; subdom[d * 2 + 1] = subdom_max[d]; diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp index b507cd0a8550c53eeb1a2f833672532bbd94df8b..5b91d7a53c73f75edb4671deb9dba9069b68a9c6 100644 --- a/runtime/domain/regular_6d_stencil.hpp +++ b/runtime/domain/regular_6d_stencil.hpp @@ -51,6 +51,7 @@ public: void setConfig(); void setBoundingBox(); void initialize(int *argc, char ***argv); + void update(); void finalize(); int getWorldSize() const { return world_size; } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index 9af0f9737707b239e11f21363ac656511fab2c2f..04f8f69ccd390499c64ebbd8c6efe1bfc1b57190 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -303,6 +303,8 @@ public: int *argc, char ***argv, real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax); + void updateDomain() { dom_part->update(); } + DomainPartitioner *getDomainPartitioner() { return dom_part; } void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); diff --git a/src/pairs/analysis/__init__.py b/src/pairs/analysis/__init__.py index 846843c3c72ee73a6359a74f397195eed453f8f2..99e7a80d768f74fa24f007358ad2976e9a35c006 100644 --- a/src/pairs/analysis/__init__.py +++ b/src/pairs/analysis/__init__.py @@ -6,6 +6,8 @@ from pairs.analysis.modules import FetchModulesReferences class Analysis: + """Compiler analysis performed on P4IRS""" + def __init__(self, ast): self._ast = ast diff --git a/src/pairs/sim/domain.py b/src/pairs/sim/domain.py index 8126b58fea395785fcf5e486ca9573afa0d8049d..2e0d9877d28ca5f2dfd763ef21b20307ab5d752e 100644 --- a/src/pairs/sim/domain.py +++ b/src/pairs/sim/domain.py @@ -9,3 +9,12 @@ class InitializeDomain(Lowerable): @pairs_inline def lower(self): self.sim.domain_partitioning().initialize() + + +class UpdateDomain(Lowerable): + def __init__(self, sim): + super().__init__(sim) + + @pairs_inline + def lower(self): + self.sim.domain_partitioning().update() diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py index 93822feacca8a32fb2e4a0a093ce2cda2cb87bde..83875af3847a2a61d787f910a270435835f6051a 100644 --- a/src/pairs/sim/domain_partitioning.py +++ b/src/pairs/sim/domain_partitioning.py @@ -36,6 +36,9 @@ class DimensionRanges: Call_Void(self.sim, "pairs->copyRuntimeArray", ['pbc', self.pbc, self.sim.ndims() * 2]) Call_Void(self.sim, "pairs->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2]) + def update(self): + Call_Void(self.sim, "pairs->updateDomain", []) + def ghost_particles(self, step, position, offset=0.0): # Particles with one of the following flags are ignored flags_to_exclude = (Flags.Infinite | Flags.Global) @@ -99,6 +102,8 @@ class BlockForest: grid_array = [(self.sim.grid.min(d), self.sim.grid.max(d)) for d in range(self.sim.ndims())] Call_Void(self.sim, "pairs->initDomain", [param for delim in grid_array for param in delim]) + def update(self): + Call_Void(self.sim, "pairs->updateDomain", []) Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs->getNumberOfNeighborRanks", [])) Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs->getNumberOfNeighborAABBs", [])) diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py index d8d751818829e0aa343967218eeabd2df969c51f..faacef8eb79f2122f975db60a1b58378194578b5 100644 --- a/src/pairs/sim/simulation.py +++ b/src/pairs/sim/simulation.py @@ -17,7 +17,7 @@ from pairs.sim.comm import Comm from pairs.sim.contact_history import ContactHistory, BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus from pairs.sim.copper_fcc_lattice import CopperFCCLattice from pairs.sim.dem_sc_grid import DEMSCGrid -from pairs.sim.domain import InitializeDomain +from pairs.sim.domain import InitializeDomain, UpdateDomain from pairs.sim.domain_partitioners import DomainPartitioners from pairs.sim.domain_partitioning import BlockForest, DimensionRanges from pairs.sim.features import AllocateFeatureProperties @@ -243,7 +243,7 @@ class Simulation: def add_var(self, var_name, var_type, init_value=0, runtime=False): assert self.var(var_name) is None, f"Variable already defined: {var_name}" - return self.vars.add(var_name, var_type, init_value) + return self.vars.add(var_name, var_type, init_value, runtime) def add_temp_var(self, init_value): return self.vars.add_temp(init_value) @@ -436,6 +436,8 @@ class Simulation: comm = Comm(self, self._dom_part) # Params that determine when a method must be called only when reneighboring every_reneighbor_params = {'every': self.reneighbor_frequency} + # Update domain is added at last on setups because particles must be already present in the simulation + self.setups.add_statement(UpdateDomain(self)) # First steps executed during each time-step in the simulation timestep_procedures = self.pre_step_functions + [