diff --git a/runtime/domain/regular_6d_stencil.cpp b/runtime/domain/regular_6d_stencil.cpp index 0c6f100fd8afd93f473acab26504c236ec81f8f3..80d617a881a43ac089e4643825a89669f7468745 100644 --- a/runtime/domain/regular_6d_stencil.cpp +++ b/runtime/domain/regular_6d_stencil.cpp @@ -24,19 +24,27 @@ void Regular6DStencil::setConfig() { } } - const int imax = partition_flags[0] ? world_size : 1; - const int jmax = partition_flags[1] ? world_size : 1; - const int kmax = partition_flags[2] ? world_size : 1; - for(int i = 1; i <= imax; i++) { - for(int j = 1; j <= jmax; j++) { - for(int k = 1; k <= kmax; k++) { - if((i * j * k) == world_size) { - const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k); - if(surf < best_surf) { - nranks[0] = i; - nranks[1] = j; - nranks[2] = k; - best_surf = surf; + for (int i = 1; i <= world_size; i++) { + if (world_size % i == 0) { + const int rem_yz = world_size / i; + + for (int j = 1; j <= rem_yz; j++) { + if (rem_yz % j == 0) { + const int k = rem_yz / j; + + // Check flags for each dimension + if((partition_flags[0] || i == 1) && + (partition_flags[1] || j == 1) && + (partition_flags[2] || k == 1)) { + + const real_t surf = (area[0] / i / j) + (area[1] / i / k) + (area[2] / j / k); + + if (surf < best_surf) { + nranks[0] = i; + nranks[1] = j; + nranks[2] = k; + best_surf = surf; + } } } } @@ -124,43 +132,57 @@ void Regular6DStencil::communicateData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); - std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); + //MPI_Request recv_requests[2]; + //MPI_Request send_requests[2]; const real_t *send_prev = &send_buf[send_offsets[dim * 2 + 0] * elem_size]; const real_t *send_next = &send_buf[send_offsets[dim * 2 + 1] * elem_size]; real_t *recv_prev = &recv_buf[recv_offsets[dim * 2 + 0] * elem_size]; real_t *recv_next = &recv_buf[recv_offsets[dim * 2 + 1] * elem_size]; - if (prev[dim] != rank) { - MPI_Isend( + if(prev[dim] != rank) { + MPI_Sendrecv( send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, - MPI_COMM_WORLD, &send_requests[0]); + recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + /* MPI_Irecv( recv_prev, nrecv[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, MPI_COMM_WORLD, &recv_requests[0]); + + MPI_Isend( + send_prev, nsend[dim * 2 + 0] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, &send_requests[0]); + */ } else { - for (int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) { + for(int i = 0; i < nsend[dim * 2 + 0] * elem_size; i++) { recv_prev[i] = send_prev[i]; } } - if (next[dim] != rank) { - MPI_Isend( + if(next[dim] != rank) { + MPI_Sendrecv( send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, - MPI_COMM_WORLD, &send_requests[1]); + recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, prev[dim], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + /* MPI_Irecv( recv_next, nrecv[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, MPI_COMM_WORLD, &recv_requests[1]); + + MPI_Isend( + send_next, nsend[dim * 2 + 1] * elem_size, MPI_DOUBLE, next[dim], 0, + MPI_COMM_WORLD, &send_requests[1]); + */ } else { - for (int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) { + for(int i = 0; i < nsend[dim * 2 + 1] * elem_size; i++) { recv_next[i] = send_next[i]; } } - MPI_Waitall(2, send_requests.data(), MPI_STATUSES_IGNORE); - MPI_Waitall(2, recv_requests.data(), MPI_STATUSES_IGNORE); + //MPI_Waitall(2, recv_requests, MPI_STATUSES_IGNORE); + //MPI_Waitall(2, send_requests, MPI_STATUSES_IGNORE); } void Regular6DStencil::communicateAllData( @@ -168,8 +190,8 @@ void Regular6DStencil::communicateAllData( const real_t *send_buf, const int *send_offsets, const int *nsend, real_t *recv_buf, const int *recv_offsets, const int *nrecv) { - std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); - std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); + //std::vector<MPI_Request> send_requests(ndims * 2, MPI_REQUEST_NULL); + //std::vector<MPI_Request> recv_requests(ndims * 2, MPI_REQUEST_NULL); for (int d = 0; d < ndims; d++) { const real_t *send_prev = &send_buf[send_offsets[d * 2 + 0] * elem_size]; @@ -178,6 +200,12 @@ void Regular6DStencil::communicateAllData( real_t *recv_next = &recv_buf[recv_offsets[d * 2 + 1] * elem_size]; if (prev[d] != rank) { + MPI_Sendrecv( + send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, + recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, next[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* MPI_Isend( send_prev, nsend[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, MPI_COMM_WORLD, &send_requests[d * 2 + 0]); @@ -185,6 +213,7 @@ void Regular6DStencil::communicateAllData( MPI_Irecv( recv_prev, nrecv[d * 2 + 0] * elem_size, MPI_DOUBLE, prev[d], 0, MPI_COMM_WORLD, &recv_requests[d * 2 + 0]); + */ } else { for (int i = 0; i < nsend[d * 2 + 0] * elem_size; i++) { recv_prev[i] = send_prev[i]; @@ -192,6 +221,12 @@ void Regular6DStencil::communicateAllData( } if (next[d] != rank) { + MPI_Sendrecv( + send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, + recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, prev[d], 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + /* MPI_Isend( send_next, nsend[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, MPI_COMM_WORLD, &send_requests[d * 2 + 1]); @@ -199,6 +234,7 @@ void Regular6DStencil::communicateAllData( MPI_Irecv( recv_next, nrecv[d * 2 + 1] * elem_size, MPI_DOUBLE, next[d], 0, MPI_COMM_WORLD, &recv_requests[d * 2 + 1]); + */ } else { for (int i = 0; i < nsend[d * 2 + 1] * elem_size; i++) { recv_next[i] = send_next[i]; @@ -206,8 +242,8 @@ void Regular6DStencil::communicateAllData( } } - MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); - MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); + //MPI_Waitall(ndims * 2, send_requests.data(), MPI_STATUSES_IGNORE); + //MPI_Waitall(ndims * 2, recv_requests.data(), MPI_STATUSES_IGNORE); } } diff --git a/runtime/pairs_common.hpp b/runtime/pairs_common.hpp index 61925a1746f52eb82eb6bb30ab5a0ca89208bfc8..dfb6c2e2e5aec371258f9db803a169c6e528e636 100644 --- a/runtime/pairs_common.hpp +++ b/runtime/pairs_common.hpp @@ -1,4 +1,5 @@ #include <iostream> +#include <mpi.h> #pragma once @@ -52,7 +53,14 @@ enum DomainPartitioners { #ifdef DEBUG # include <assert.h> -# define PAIRS_DEBUG(...) fprintf(stderr, __VA_ARGS__) +# define PAIRS_DEBUG(...) { \ + int __rank; \ + MPI_Comm_rank(MPI_COMM_WORLD, &__rank); \ + if(__rank == 0) { \ + fprintf(stderr, __VA_ARGS__); \ + } \ + } + # define PAIRS_ASSERT(a) assert(a) # define PAIRS_EXCEPTION(a) #else