diff --git a/Makefile b/Makefile index ba82842577390602a55904c2cb8c71a8863909cc..ca5aa90506fea55ecd3a520d413260f3246f0d54 100644 --- a/Makefile +++ b/Makefile @@ -18,11 +18,13 @@ lj_ns.cu: # Targets lj_cpu: lj_ns.cpp # g++ -o lj_cpu lj_ns.cpp -DDEBUG - mpic++ -O3 -o lj_cpu lj_ns.cpp runtime/pairs.cpp runtime/devices/dummy.cpp -DDEBUG + mpic++ -O3 -o lj_cpu lj_ns.cpp runtime/pairs.cpp runtime/domain/regular_6d_stencil.cpp runtime/devices/dummy.cpp -DDEBUG lj_gpu: lj_ns.cu - nvcc -o lj_gpu lj_ns.cu + mpic++ -c -o pairs.o runtime/pairs.cpp -DDEBUG + mpic++ -c -o regular_6d_stencil.o runtime/domain/regular_6d_stencil.cpp -DDEBUG + nvcc -o lj_gpu runtime/devices/cuda.cu runtime.o regular_6d_stencil.o lj_ns.cu clean: @echo "Cleaning..." - rm -rf build lj_cpu lj_gpu lj_ns.cpp lj_ns.cu dist pairs.egg-info functions functions.pdf + rm -rf build lj_cpu lj_gpu lj_ns.cpp lj_ns.cu dist pairs.egg-info functions functions.pdf pairs.o regular_6d_stencil.o diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 50b35006f2bbf70be3879abd539eaa882e66f4c2..c6ecfb5f2d000a43f36e35b747411115faa573b8 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -1,8 +1,6 @@ #include <cuda_runtime.h> #include <iostream> -#pragma once - #define CUDA_ASSERT(a) { pairs::cuda_assert((a), __FILE__, __LINE__); } namespace pairs { diff --git a/runtime/domain/domain_partitioning.hpp b/runtime/domain/domain_partitioning.hpp index c4a231b3e14c5ab6ea911f1b830a348399f55677..e08e5eebba5576d6094205a837d69c606e4f7662 100644 --- a/runtime/domain/domain_partitioning.hpp +++ b/runtime/domain/domain_partitioning.hpp @@ -1,5 +1,3 @@ -#include <mpi.h> -//--- #include "../pairs_common.hpp" #pragma once diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp new file mode 100644 index 0000000000000000000000000000000000000000..659c3d397372f49d2cad4e5479ba0a3d7429473f --- /dev/null +++ b/runtime/domain/regular_6d_stencil.cpp @@ -0,0 +1,161 @@ +#include <mpi.h> +//--- +#include "../pairs_common.hpp" +#include "regular_6d_stencil.hpp" + +namespace pairs { + +void Regular6DStencil::setConfig() { + PAIRS_ASSERT(ndims == 3); + real_t area[3]; + real_t best_surf = 0.0; + int d = 0; + + for(int d1 = 0; d1 < ndims; d1++) { + nranks[d1] = 1; + for(int d2 = d1 + 1; d2 < ndims; d2++) { + area[d] = (this->grid_max[d1] - this->grid_min[d1]) * (this->grid_max[d2] - this->grid_min[d2]); + best_surf += 2.0 * area[d]; + d++; + } + } + + for(int i = 1; i < world_size; i++) { + if(world_size % i == 0) { + const int rem_yz = world_size / i; + for(int j = 1; j < rem_yz; j++) { + if(rem_yz % j == 0) { + const int k = rem_yz / j; + const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k); + if(surf < best_surf) { + nranks[0] = i; + nranks[1] = j; + nranks[2] = k; + best_surf = surf; + } + } + } + } + } +} + +void Regular6DStencil::setBoundingBox() { + MPI_Comm cartesian; + int *myloc = new int[ndims]; + int *periods = new int[ndims]; + real_t *rank_length = new real_t[ndims]; + int reorder = 0; + + for(int d = 0; d < ndims; d++) { + periods[d] = 1; + rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d]; + } + + MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian); + MPI_Cart_get(cartesian, ndims, nranks, periods, myloc); + for(int d = 0; d < ndims; d++) { + MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); + pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; + pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; + subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; + subdom_max[d] = subdom_min[d] + rank_length[d]; + } + + delete[] myloc; + delete[] periods; + delete[] rank_length; + MPI_Comm_free(&cartesian); +} + +void Regular6DStencil::initialize(int *argc, char ***argv) { + MPI_Init(argc, argv); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + this->setConfig(); + this->setBoundingBox(); +} + +void Regular6DStencil::finalize() { + MPI_Finalize(); +} + +int Regular6DStencil::isWithinSubdomain(real_t x, real_t y, real_t z) { + return x >= subdom_min[0] && x < subdom_max[0] - SMALL && + y >= subdom_min[1] && y < subdom_max[1] - SMALL && + z >= subdom_min[2] && z < subdom_max[2] - SMALL; +} + +void Regular6DStencil::fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { + for(int d = 0; d < ndims; d++) { + neighbor_ranks[d * 2 + 0] = prev[d]; + neighbor_ranks[d * 2 + 1] = next[d]; + pbc[d * 2 + 0] = pbc_prev[d]; + pbc[d * 2 + 1] = pbc_next[d]; + subdom[d * 2 + 0] = subdom_min[d]; + subdom[d * 2 + 1] = subdom_max[d]; + } +} + +void Regular6DStencil::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { + std::cout << "communicateSizes" << std::endl; + + if(prev[dim] != rank) { + MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD); + MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0]; + } + + if(next[dim] != rank) { + MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD); + MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1]; + } +} + +void Regular6DStencil::communicateData( + int dim, int elem_size, + const real_t *send_buf, const int *send_offsets, const int *nsend, + real_t *recv_buf, const int *recv_offsets, const int *nrecv) { + std::cout << "communicateData" << std::endl; + + const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; + const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; + real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; + real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; + + if(prev[dim] != rank) { + std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl; + //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD); + //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv( + send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + } else { + for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) { + recv_prev[i] = send_prev[i]; + } + } + + if(next[dim] != rank) { + std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl; + //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD); + //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MPI_Sendrecv( + send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + } else { + for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) { + recv_next[i] = send_next[i]; + } + } +} + +} diff --git a/runtime/domain/regular_6d_stencil.hpp b/runtime/domain/regular_6d_stencil.hpp index 631ded149ca38869db7f9c797dbd61cc6aa80075..1c12ff45de9a2a1a69b1067cdc42d4c2e995c338 100644 --- a/runtime/domain/regular_6d_stencil.hpp +++ b/runtime/domain/regular_6d_stencil.hpp @@ -3,7 +3,7 @@ #pragma once -#define SMALL 0.00001 +#define SMALL 0.00001 namespace pairs { @@ -41,160 +41,19 @@ public: delete[] subdom_max; } - void setConfig() { - PAIRS_ASSERT(ndims == 3); - real_t area[3]; - real_t best_surf = 0.0; - int d = 0; - - for(int d1 = 0; d1 < ndims; d1++) { - nranks[d1] = 1; - for(int d2 = d1 + 1; d2 < ndims; d2++) { - area[d] = (this->grid_max[d1] - this->grid_min[d1]) * (this->grid_max[d2] - this->grid_min[d2]); - best_surf += 2.0 * area[d]; - d++; - } - } - - for(int i = 1; i < world_size; i++) { - if(world_size % i == 0) { - const int rem_yz = world_size / i; - for(int j = 1; j < rem_yz; j++) { - if(rem_yz % j == 0) { - const int k = rem_yz / j; - const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k); - if(surf < best_surf) { - nranks[0] = i; - nranks[1] = j; - nranks[2] = k; - best_surf = surf; - } - } - } - } - } - } - - void setBoundingBox() { - MPI_Comm cartesian; - int *myloc = new int[ndims]; - int *periods = new int[ndims]; - real_t *rank_length = new real_t[ndims]; - int reorder = 0; - - for(int d = 0; d < ndims; d++) { - periods[d] = 1; - rank_length[d] = (this->grid_max[d] - this->grid_min[d]) / (real_t) nranks[d]; - } - - MPI_Cart_create(MPI_COMM_WORLD, ndims, nranks, periods, reorder, &cartesian); - MPI_Cart_get(cartesian, ndims, nranks, periods, myloc); - for(int d = 0; d < ndims; d++) { - MPI_Cart_shift(cartesian, d, 1, &(prev[d]), &(next[d])); - pbc_prev[d] = (myloc[d] == 0) ? 1 : 0; - pbc_next[d] = (myloc[d] == nranks[d] - 1) ? -1 : 0; - subdom_min[d] = this->grid_min[d] + rank_length[d] * (real_t)myloc[d]; - subdom_max[d] = subdom_min[d] + rank_length[d]; - } - - delete[] myloc; - delete[] periods; - delete[] rank_length; - MPI_Comm_free(&cartesian); - } - - void initialize(int *argc, char ***argv) { - MPI_Init(argc, argv); - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - this->setConfig(); - this->setBoundingBox(); - } - - void finalize() { - MPI_Finalize(); - } - + void setConfig(); + void setBoundingBox(); + void initialize(int *argc, char ***argv); + void finalize(); int getWorldSize() const { return world_size; } int getRank() const { return rank; } - int isWithinSubdomain(real_t x, real_t y, real_t z) { - return x >= subdom_min[0] && x < subdom_max[0] - SMALL && - y >= subdom_min[1] && y < subdom_max[1] - SMALL && - z >= subdom_min[2] && z < subdom_max[2] - SMALL; - } - - void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom) { - for(int d = 0; d < ndims; d++) { - neighbor_ranks[d * 2 + 0] = prev[d]; - neighbor_ranks[d * 2 + 1] = next[d]; - pbc[d * 2 + 0] = pbc_prev[d]; - pbc[d * 2 + 1] = pbc_next[d]; - subdom[d * 2 + 0] = subdom_min[d]; - subdom[d * 2 + 1] = subdom_max[d]; - } - } - - void communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { - std::cout << "communicateSizes" << std::endl; - - if(prev[dim] != rank) { - MPI_Send(&send_sizes[dim * 2 + 0], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD); - MPI_Recv(&recv_sizes[dim * 2 + 0], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } else { - recv_sizes[dim * 2 + 0] = send_sizes[dim * 2 + 0]; - } - - if(next[dim] != rank) { - MPI_Send(&send_sizes[dim * 2 + 1], 1, MPI_INT, next[dim], 0, MPI_COMM_WORLD); - MPI_Recv(&recv_sizes[dim * 2 + 1], 1, MPI_INT, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } else { - recv_sizes[dim * 2 + 1] = send_sizes[dim * 2 + 1]; - } - } - + int isWithinSubdomain(real_t x, real_t y, real_t z); + void fillArrays(int *neighbor_ranks, int *pbc, real_t *subdom); + void communicateSizes(int dim, const int *send_sizes, int *recv_sizes); void communicateData( int dim, int elem_size, const real_t *send_buf, const int *send_offsets, const int *nsend, - real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - std::cout << "communicateData" << std::endl; - - const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; - const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; - real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; - real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; - - if(prev[dim] != rank) { - std::cout << rank << ": send " << nsend[dim * 2 + 0] << " elems to " << prev[dim] << ", recv " << nrecv[dim * 2 + 0] << " elems from " << next[dim] << std::endl; - //MPI_Send(send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD); - //MPI_Recv(recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - MPI_Sendrecv( - send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - } else { - for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) { - recv_prev[i] = send_prev[i]; - } - } - - if(next[dim] != rank) { - std::cout << rank << ": send " << nsend[dim * 2 + 1] << " elems to " << next[dim] << ", recv " << nrecv[dim * 2 + 1] << " elems from " << prev[dim] << std::endl; - //MPI_Send(send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD); - //MPI_Recv(recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - MPI_Sendrecv( - send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - } else { - for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) { - recv_next[i] = send_next[i]; - } - } - } + real_t *recv_buf, const int *recv_offsets, const int *nrecv); }; }