Skip to content
Snippets Groups Projects
Commit d637b5ba authored by Nils Kohl's avatar Nils Kohl :full_moon_with_face:
Browse files

Initial commit

parents
Branches master
No related tags found
No related merge requests found
cmake_minimum_required(VERSION 3.5)
# set the project name
project(mpi-sor-benchmark)
find_package(MPI REQUIRED)
include_directories(${MPI_INCLUDE_PATH})
# add the executable
add_executable(mpi-sor-benchmark main.cpp dummy.cpp kernel.cpp)
target_link_libraries(mpi-sor-benchmark ${MPI_LIBRARIES})
function ( link_files_to_builddir globExpression )
# don't need links for in-source builds
if( CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_CURRENT_BINARY_DIR}" )
return()
endif()
file( GLOB filesToLink RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${globExpression} )
foreach( f ${filesToLink} )
if( CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows" )
configure_file( ${f} ${f} COPYONLY )
else()
execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/${f}
${CMAKE_CURRENT_BINARY_DIR}/${f} )
endif()
endforeach()
endfunction ( link_files_to_builddir )
link_files_to_builddir(*.py)
link_files_to_builddir(*.sh)
set( CMAKE_BUILD_TYPE "Release" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ffast-math" )
\ No newline at end of file
# Mini benchmark to trigger SuperMUC-NG performance fluctuations (ticket 130502)
## Build instructions
Out-of-source build with cmake:
$ git clone https://i10git.cs.fau.de/kohl/mpi-sor-benchmark.git
$ mkdir mpi-sor-benchmark-build
$ cd mpi-sor-benchmark-build
$ cmake ../mpi-sor-benchmark
$ make
Create job scripts with `python create_run_script.py`.
This creates a new directory with a bunch of job files that can be submitted directly, e.g.:
$ python create_run_script.py
$ # creates directory '2020-01-28_13-56-08'
$ cd 2020-01-28_13-56-08
$ sbatch snoop_n96_ppn48_level6_cellspp10_inner10_outer2_2020-01-28_13-56-08.job
$ # runs the benchmark on refinement level 6, 10 cells per process, 10 inner iterations,
$ # 2 outer iterations on 96 nodes with 48 ppn
To best reproduce the results try both level 6 and 7 as well as 10 or 100 inner iterations.
If number of cells per process parameter is greater than 1, the kernel is simply executed on multiple
(different) arrays and each execution is measured independently.
The outer iteration parameter sets the number of times the measurement is performed.
Results are written into `.csv` files. Use `plot_csv.py output.csv 0 1` to visualize the timings per rank
for cell 0 and outer iteration 1.
Example output:
![image](snoop_0_1.png)
\ No newline at end of file
import time
import datetime
import os
def supermuc_job_file_string(job_name="job", wall_clock_limit="1:00:00", prm_string="parameter_file.prm", num_nodes=1, ppn=48):
def partition(num_nodes):
if num_nodes <= 16:
return "micro"
elif num_nodes <= 768:
return "general"
elif num_nodes <= 3072:
return "large"
constraint = ""
if num_nodes <= 792:
constraint = "#SBATCH --constraint=[i01|i02|i03|i04|i05|i06|i07|i08]"
base_config = """#!/bin/bash
# Job Name and Files (also --job-name)
#SBATCH -J {job_name}
#Output and error (also --output, --error):
#SBATCH -o ./%x.%j.out
#SBATCH -e ./%x.%j.err
#Initial working directory (also --chdir):
#SBATCH -D ./
#Notification and type
#SBATCH --mail-type=END
#SBATCH --mail-user=nils.kohl@fau.de
# Wall clock limit:
#SBATCH --time={wall_clock_limit}
#SBATCH --no-requeue
#Setup of execution environment
#SBATCH --export=NONE
#SBATCH --get-user-env
#SBATCH --account=pr86ma
#SBATCH --ear=off
#SBATCH --partition={partition}
#Number of nodes and MPI tasks per node:
#SBATCH --nodes={num_nodes}
#SBATCH --ntasks-per-node={ppn}
{constraint}
module load slurm_setup
cd ..
pwd
ls -lha
source load_modules.sh
module list
#Run the program:
mpiexec -n $SLURM_NTASKS ./mpi-sor-benchmark {prm_string}
""".format(job_name=job_name, wall_clock_limit=wall_clock_limit, num_nodes=num_nodes, prm_string=prm_string, partition=partition(num_nodes),
constraint=constraint, ppn=ppn)
return base_config
def supermuc_scaling():
some_id = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H-%M-%S')
os.mkdir(some_id)
ppn = 48
outer_iterations = 2
inner_iterations_list = [1, 10, 100]
cells_pp_list = [2, 10]
level_list = [6, 7]
for num_nodes in [1, 2, 6, 12, 24, 48, 96, 192, 384, 768, 1536, 3072]:
for inner_iterations in inner_iterations_list:
for cells_pp in cells_pp_list:
for level in level_list:
job_name = "snoop_n{}_ppn{}_level{}_cellspp{}_inner{}_outer{}_{}".format(num_nodes, ppn, level, cells_pp, inner_iterations, outer_iterations, some_id)
job_file_name = job_name + ".job"
db_file = job_name + ".csv"
prm_string = "{} {} {} {} {}".format(level, cells_pp, outer_iterations, inner_iterations, db_file)
job_string = supermuc_job_file_string(job_name=job_name, wall_clock_limit="0:10:00",
num_nodes=num_nodes, prm_string=prm_string, ppn=ppn)
with open(os.path.join(some_id, job_file_name), "w") as f:
f.write(job_string)
if __name__ == "__main__":
supermuc_scaling()
#include "dummy.h"
void dummy(double *a) {}
\ No newline at end of file
void dummy(double *a);
\ No newline at end of file
#include "kernel.h"
void kernel(double const *RESTRICT const _data_edgeCellDst_X,
double const *RESTRICT const _data_edgeCellDst_XY,
double const *RESTRICT const _data_edgeCellDst_XYZ,
double const *RESTRICT const _data_edgeCellDst_XZ,
double const *RESTRICT const _data_edgeCellDst_Y,
double const *RESTRICT const _data_edgeCellDst_YZ,
double const *RESTRICT const _data_edgeCellDst_Z,
double *RESTRICT _data_vertexCellDst,
double const *RESTRICT const _data_vertexCellRhs,
double const *RESTRICT const _data_xi, unsigned level,
double relax) {
const double xi_69 = 1.0;
const double xi_70 = -relax;
const double xi_1 = _data_xi[1];
const double xi_67 = 1 / (xi_1);
const double xi_2 = _data_xi[2];
const double xi_3 = _data_xi[3];
const double xi_4 = _data_xi[4];
const double xi_5 = _data_xi[5];
const double xi_6 = _data_xi[6];
const double xi_7 = _data_xi[7];
const double xi_8 = _data_xi[8];
const double xi_9 = _data_xi[9];
const double xi_10 = _data_xi[10];
const double xi_11 = _data_xi[11];
const double xi_12 = _data_xi[12];
const double xi_13 = _data_xi[13];
const double xi_14 = _data_xi[14];
const double xi_15 = _data_xi[15];
const double xi_16 = _data_xi[16];
const double xi_17 = _data_xi[17];
const double xi_18 = _data_xi[18];
const double xi_19 = _data_xi[19];
const double xi_20 = _data_xi[20];
const double xi_21 = _data_xi[21];
const double xi_22 = _data_xi[22];
const double xi_23 = _data_xi[23];
const double xi_24 = _data_xi[24];
const double xi_25 = _data_xi[25];
const double xi_26 = _data_xi[26];
const double xi_27 = _data_xi[27];
const double xi_28 = _data_xi[28];
const double xi_29 = _data_xi[29];
const double xi_30 = _data_xi[30];
const double xi_31 = _data_xi[31];
const double xi_32 = _data_xi[32];
const double xi_33 = _data_xi[33];
const double xi_34 = _data_xi[34];
const double xi_35 = _data_xi[35];
const double xi_36 = _data_xi[36];
const double xi_37 = _data_xi[37];
const double xi_38 = _data_xi[38];
const double xi_39 = _data_xi[39];
const double xi_40 = _data_xi[40];
const double xi_41 = _data_xi[41];
const double xi_42 = _data_xi[42];
const double xi_43 = _data_xi[43];
const double xi_44 = _data_xi[44];
const double xi_45 = _data_xi[45];
const double xi_46 = _data_xi[46];
const double xi_47 = _data_xi[47];
const double xi_48 = _data_xi[48];
const double xi_49 = _data_xi[49];
const double xi_50 = _data_xi[50];
const double xi_51 = _data_xi[51];
const double xi_52 = _data_xi[52];
const double xi_53 = _data_xi[53];
const double xi_54 = _data_xi[54];
const double xi_55 = _data_xi[55];
const double xi_56 = _data_xi[56];
const double xi_57 = _data_xi[57];
const double xi_58 = _data_xi[58];
const double xi_59 = _data_xi[59];
const double xi_60 = _data_xi[60];
const double xi_61 = _data_xi[61];
const double xi_62 = _data_xi[62];
const double xi_63 = _data_xi[63];
const double xi_64 = _data_xi[64];
const double xi_65 = _data_xi[65];
for (unsigned ctr_3 = 1; ctr_3 < (1 << (level)); ctr_3 += 1) {
for (unsigned ctr_2 = 1; ctr_2 < -ctr_3 + (1 << (level)); ctr_2 += 1) {
// cell (inner)
for (unsigned ctr_1 = 1; ctr_1 < -ctr_2 - ctr_3 + (1 << (level));
ctr_1 += 1) {
const double xi_135 =
_data_vertexCellRhs[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_81 =
-xi_2 *
_data_edgeCellDst_XYZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6)) -
1];
const double xi_92 =
-xi_3 *
_data_edgeCellDst_XYZ[ctr_1 +
ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6)) -
1];
const double xi_103 =
-xi_4 *
_data_edgeCellDst_XYZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6)) -
1];
const double xi_114 =
-xi_5 *
_data_edgeCellDst_XYZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6))];
const double xi_125 =
-xi_6 *
_data_edgeCellDst_XYZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6))];
const double xi_132 =
-xi_7 *
_data_edgeCellDst_XYZ[ctr_1 +
ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) - 1) *
((1 << (level)) + 1) * (1 << (level))) /
(6))];
const double xi_133 =
-xi_8 *
_data_edgeCellDst_XY[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_134 =
-xi_9 *
_data_edgeCellDst_XY[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_71 =
-xi_10 *
_data_edgeCellDst_XY[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_72 =
-xi_11 *
_data_edgeCellDst_XY[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_73 =
-xi_12 *
_data_edgeCellDst_XY[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_74 =
-xi_13 *
_data_edgeCellDst_XY[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_75 =
-xi_14 *
_data_edgeCellDst_XY[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_76 =
-xi_15 *
_data_edgeCellDst_XY[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_77 =
-xi_16 *
_data_edgeCellDst_XZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_78 =
-xi_17 *
_data_edgeCellDst_XZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_79 =
-xi_18 *
_data_edgeCellDst_XZ[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_80 =
-xi_19 *
_data_edgeCellDst_XZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_82 =
-xi_20 *
_data_edgeCellDst_XZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_83 =
-xi_21 *
_data_edgeCellDst_XZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_84 =
-xi_22 *
_data_edgeCellDst_X[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_85 =
-xi_23 *
_data_edgeCellDst_X[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_86 =
-xi_24 *
_data_edgeCellDst_X[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_87 =
-xi_25 *
_data_edgeCellDst_X[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 1) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_88 =
-xi_26 *
_data_edgeCellDst_X[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_89 =
-xi_27 *
_data_edgeCellDst_X[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_90 =
-xi_28 *
_data_edgeCellDst_X[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_91 =
-xi_29 *
_data_edgeCellDst_X[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_93 =
-xi_30 *
_data_edgeCellDst_YZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_94 =
-xi_31 *
_data_edgeCellDst_YZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_95 =
-xi_32 *
_data_edgeCellDst_YZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 - 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_96 =
-xi_33 *
_data_edgeCellDst_YZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_97 =
-xi_34 *
_data_edgeCellDst_YZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_98 =
-xi_35 *
_data_edgeCellDst_YZ[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_99 =
-xi_36 *
_data_edgeCellDst_YZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 - 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) +
1];
const double xi_100 =
-xi_37 *
_data_edgeCellDst_YZ[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) +
1];
const double xi_101 =
-xi_38 *
_data_edgeCellDst_Y[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_102 =
-xi_39 *
_data_edgeCellDst_Y[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_104 =
-xi_40 *
_data_edgeCellDst_Y[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level))) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) - 1) *
(-ctr_3 + (1 << (level)) + 1)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_105 =
-xi_41 *
_data_edgeCellDst_Y[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_106 =
-xi_42 *
_data_edgeCellDst_Y[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_107 =
-xi_43 *
_data_edgeCellDst_Y[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) +
1];
const double xi_108 =
-xi_44 *
_data_edgeCellDst_Z[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
1];
const double xi_109 =
-xi_45 *
_data_edgeCellDst_Z[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_110 =
-xi_46 *
_data_edgeCellDst_Z[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_111 =
-xi_47 *
_data_edgeCellDst_Z[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_112 =
-xi_48 *
_data_edgeCellDst_Z[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6))];
const double xi_113 =
-xi_49 *
_data_edgeCellDst_Z[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_115 =
-xi_50 *
_data_edgeCellDst_Z[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) +
1];
const double xi_116 =
-xi_51 *
_data_edgeCellDst_Z[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
(1 << (level))) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) +
1];
const double xi_117 =
-xi_52 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_118 =
-xi_53 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
1];
const double xi_119 =
-xi_54 *
_data_vertexCellDst[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 3) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3) *
(-ctr_3 + (1 << (level)) + 4)) /
(6)) -
1];
const double xi_120 =
-xi_55 *
_data_vertexCellDst[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) -
1];
const double xi_121 =
-xi_56 *
_data_vertexCellDst[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 - 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_122 =
-xi_57 *
_data_vertexCellDst[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6))];
const double xi_123 =
-xi_58 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 3) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3) *
(-ctr_3 + (1 << (level)) + 4)) /
(6))];
const double xi_124 =
-xi_59 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 + 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6))];
const double xi_126 =
-xi_60 *
_data_vertexCellDst[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 3) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3) *
(-ctr_3 + (1 << (level)) + 4)) /
(6))];
const double xi_127 =
-xi_61 *
_data_vertexCellDst[ctr_1 +
(ctr_2 + 1) * (-ctr_3 + (1 << (level)) + 2) -
(((ctr_2 + 1) * (ctr_2 + 2)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
const double xi_128 =
-xi_62 *
_data_vertexCellDst[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 - 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) +
1];
const double xi_129 =
-xi_63 *
_data_vertexCellDst[ctr_1 +
(ctr_2 - 1) * (-ctr_3 + (1 << (level)) + 1) -
((ctr_2 * (ctr_2 - 1)) / (2)) -
(((-ctr_3 + (1 << (level))) *
(-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2)) /
(6)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) +
1];
const double xi_130 =
-xi_64 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 3) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3) *
(-ctr_3 + (1 << (level)) + 4)) /
(6)) +
1];
const double xi_131 =
-xi_65 *
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6)) +
1];
_data_vertexCellDst[ctr_1 + ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) * ((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))] =
relax * xi_67 *
(xi_100 + xi_101 + xi_102 + xi_103 + xi_104 + xi_105 + xi_106 +
xi_107 + xi_108 + xi_109 + xi_110 + xi_111 + xi_112 + xi_113 +
xi_114 + xi_115 + xi_116 + xi_117 + xi_118 + xi_119 + xi_120 +
xi_121 + xi_122 + xi_123 + xi_124 + xi_125 + xi_126 + xi_127 +
xi_128 + xi_129 + xi_130 + xi_131 + xi_132 + xi_133 + xi_134 +
xi_135 + xi_71 + xi_72 + xi_73 + xi_74 + xi_75 + xi_76 +
xi_77 + xi_78 + xi_79 + xi_80 + xi_81 + xi_82 + xi_83 + xi_84 +
xi_85 + xi_86 + xi_87 + xi_88 + xi_89 + xi_90 + xi_91 + xi_92 +
xi_93 + xi_94 + xi_95 + xi_96 + xi_97 + xi_98 + xi_99) +
(xi_69 + xi_70) *
_data_vertexCellDst[ctr_1 +
ctr_2 * (-ctr_3 + (1 << (level)) + 2) -
((ctr_2 * (ctr_2 + 1)) / (2)) +
((((1 << (level)) + 1) *
((1 << (level)) + 2) *
((1 << (level)) + 3)) /
(6)) -
(((-ctr_3 + (1 << (level)) + 1) *
(-ctr_3 + (1 << (level)) + 2) *
(-ctr_3 + (1 << (level)) + 3)) /
(6))];
}
}
}
}
kernel.h 0 → 100644
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
void kernel(double const *RESTRICT const _data_edgeCellDst_X,
double const *RESTRICT const _data_edgeCellDst_XY,
double const *RESTRICT const _data_edgeCellDst_XYZ,
double const *RESTRICT const _data_edgeCellDst_XZ,
double const *RESTRICT const _data_edgeCellDst_Y,
double const *RESTRICT const _data_edgeCellDst_YZ,
double const *RESTRICT const _data_edgeCellDst_Z,
double *RESTRICT _data_vertexCellDst,
double const *RESTRICT const _data_vertexCellRhs,
double const *RESTRICT const _data_xi, unsigned level,
double relax);
\ No newline at end of file
#!/usr/bin/env bash
module unload devEnv
module load devEnv/GCC
module load boost
module load petsc
export CC=gcc
export CXX=g++
main.cpp 0 → 100644
#include "dummy.h"
#include "kernel.h"
#include <chrono>
#include <fstream>
#include <iostream>
#include <mpi.h>
#include <sstream>
#include <vector>
double get_wc_time() {
return static_cast<double>(
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()) *
1e-9;
}
void runBenchmark(unsigned level, unsigned numCellsPerProcess,
unsigned numOuterSORIterations,
unsigned numInnerSORIterations, const std::string &dbFile) {
int numProcesses;
MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) {
std::cout << "Parameters:" << std::endl;
std::cout << " - num processes: " << numProcesses << std::endl;
std::cout << " - level: " << level << std::endl;
std::cout << " - cells / process: " << numCellsPerProcess << std::endl;
std::cout << " - outer iterations: " << numOuterSORIterations << std::endl;
std::cout << " - inner iterations: " << numInnerSORIterations << std::endl;
std::cout << "" << std::endl;
}
const unsigned v_width = (1u << level) + 1;
const unsigned v_size = ((v_width + 2) * (v_width + 1) * v_width) / 6;
const unsigned e_width = (1u << level);
const unsigned e_offset = ((e_width + 2) * (e_width + 1) * e_width) / 6;
const unsigned e_width_xyz = (1u << level) - 1;
const unsigned e_offset_xyz =
((e_width_xyz + 2) * (e_width_xyz + 1) * e_width_xyz) / 6;
const unsigned size_arrays_per_cell_in_byte =
(2 * v_size + 6 * e_offset + e_offset_xyz) * 8;
if (rank == 0) {
std::cout << "size of destination array (slightly larger than number of "
"updated points per cell and iteration): "
<< v_size << std::endl;
std::cout << "size of arrays that are allocated per cell and loaded per "
"kernel call: "
<< v_size << " + " << v_size << " + 6 * " << e_offset << " + "
<< e_offset_xyz << " (~ "
<< size_arrays_per_cell_in_byte / 1000000. << " MB)" << std::endl;
std::cout << "" << std::endl;
}
std::vector<std::vector<double>> v_dst(numCellsPerProcess);
std::vector<std::vector<double>> v_rhs(numCellsPerProcess);
std::vector<std::vector<double>> e_src(numCellsPerProcess);
std::vector<double> xi(100);
for (unsigned i = 0; i < 100; i++) {
xi[i] = 0.1 * (double)i;
}
for (unsigned cellID = 0; cellID < numCellsPerProcess; cellID++) {
v_dst[cellID] = std::vector<double>(v_size, 42.);
v_rhs[cellID] = std::vector<double>(v_size, 42.);
e_src[cellID] = std::vector<double>(6 * e_offset + e_offset_xyz, 42.);
}
// sorTimings[ cellID ][ outerIter ] = timing;
std::vector<std::vector<double>> sorTimings(numCellsPerProcess);
for (unsigned id = 0; id < numCellsPerProcess; id++) {
sorTimings[id] = std::vector<double>(numOuterSORIterations);
}
if (rank == 0) {
std::cout << "Running benchmark ..." << std::endl;
std::cout << "" << std::endl;
}
for (unsigned cellID = 0; cellID < numCellsPerProcess; cellID++) {
const double relax = 1.1;
for (unsigned outer = 0; outer < numOuterSORIterations; outer++) {
MPI_Barrier(MPI_COMM_WORLD);
double start = get_wc_time();
for (unsigned inner = 0; inner < numInnerSORIterations; inner++) {
kernel(&e_src[cellID][0], &e_src[cellID][3 * e_offset],
&e_src[cellID][6 * e_offset], &e_src[cellID][4 * e_offset],
&e_src[cellID][1 * e_offset], &e_src[cellID][5 * e_offset],
&e_src[cellID][2 * e_offset], v_dst[cellID].data(),
v_rhs[cellID].data(), xi.data(), level, 1.1);
}
sorTimings[cellID][outer] = get_wc_time() - start;
}
dummy(v_dst[cellID].data());
dummy(v_rhs[cellID].data());
dummy(e_src[cellID].data());
}
// allSorTimings[ cellID ][ outerIter ][ rank ] = timing;
std::vector<std::vector<std::vector<double>>> allSorTimings(
numCellsPerProcess);
for (unsigned id = 0; id < numCellsPerProcess; id++) {
allSorTimings[id] = std::vector<std::vector<double>>(numOuterSORIterations);
for (unsigned it = 0; it < numOuterSORIterations; it++) {
allSorTimings[id][it] = std::vector<double>(numProcesses);
void *recv_buffer = nullptr;
if (rank == 0) {
recv_buffer = allSorTimings[id][it].data();
}
MPI_Gather(&sorTimings[id][it], 1, MPI_DOUBLE, recv_buffer, 1, MPI_DOUBLE,
0, MPI_COMM_WORLD);
}
}
if (rank == 0) {
std::stringstream myfile;
// header
myfile << "data_point,";
myfile << "num_processes,";
myfile << "outer_iterations,";
myfile << "inner_iterations,";
myfile << "cell_per_process,";
myfile << "level,";
myfile << "dofs_per_cell,";
myfile << "rank,";
myfile << "outer_iteration,";
myfile << "cell_id,";
myfile << "time\n";
// global data
myfile << 0 << ",";
myfile << numProcesses << ",";
myfile << numOuterSORIterations << ",";
myfile << numInnerSORIterations << ",";
myfile << numCellsPerProcess << ",";
myfile << level << ",";
myfile << v_size << ",";
myfile << ",";
myfile << ",";
myfile << ",";
myfile << "\n";
std::vector<std::vector<std::vector<double>>> sorTimingsRecv(numProcesses);
for (unsigned id = 0; id < numCellsPerProcess; id++) {
for (unsigned it = 0; it < numOuterSORIterations; it++) {
for (unsigned r = 0; r < numProcesses; r++) {
myfile << 1 << ",";
myfile << ",";
myfile << ",";
myfile << ",";
myfile << ",";
myfile << ",";
myfile << ",";
myfile << r << ",";
myfile << it << ",";
myfile << id << ",";
myfile << allSorTimings[id][it][r] << "\n";
}
}
}
double min = std::numeric_limits<double>::max();
double max = 0;
double avg = 0;
double maxDiff = 0;
unsigned maxDiffCellID = 0;
unsigned maxDiffIteration = 0;
for (unsigned id = 0; id < numCellsPerProcess; id++) {
for (unsigned it = 0; it < numOuterSORIterations; it++) {
double itMin = std::numeric_limits<double>::max();
double itMax = 0;
double itSum = 0;
for (unsigned r = 0; r < numProcesses; r++) {
const double time = allSorTimings[id][it][r];
if (itMin > time)
itMin = time;
if (itMax < time)
itMax = time;
itSum += time;
}
double itAvg = itSum / static_cast<double>(numProcesses);
double itDiff = itMax - itAvg;
if (maxDiff <= itDiff) {
maxDiff = itDiff;
avg = itAvg;
max = itMax;
min = itMin;
maxDiffCellID = id;
maxDiffIteration = it;
}
}
}
std::cout << "Measurement with largest difference (max(rank) - average(all "
"ranks)):"
<< std::endl;
std::cout << " - min: " << min << std::endl;
std::cout << " - max: " << max << std::endl;
std::cout << " - avg: " << avg << std::endl;
std::cout << " - max - avg: " << max - avg << " ("
<< (maxDiff / avg) * 100. << "% of average)" << std::endl;
std::cout << " - cell ID: " << maxDiffCellID << std::endl;
std::cout << " - outer iteration: " << maxDiffIteration << std::endl;
std::cout << "" << std::endl;
std::cout << "Writing root data (global run data) ..." << std::endl;
std::ofstream outputFile;
outputFile.open(dbFile);
outputFile << myfile.str();
outputFile.close();
}
}
int main(int argc, char **argv) {
MPI_Init(nullptr, nullptr);
int num_processes;
MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int level, numCellsPerProcess, numOuterSORIterations, numInnerSORIterations;
std::string outputFile;
if (rank == 0) {
std::cout << "Parallel SOR benchmark." << std::endl;
std::cout << "Performs stencil-based SOR iterations on structured refined "
"tetrahedral domains.\n"
<< std::endl;
}
if (argc != 6) {
if (rank == 0) {
std::cout << "Usage:" << std::endl;
std::cout << " ./mpi-sor-benchmark <level> <numCellsPerProcess> "
"<numOuterIterations> <numInnerIterations> <outputFile.csv>"
<< std::endl;
std::cout << "" << std::endl;
std::cout << "Parameters:" << std::endl;
std::cout << " - level: refinement level of the tet (6 "
"or 7 should be used for total memory allocation of ~3MB "
"and ~25MB per cell)"
<< std::endl;
std::cout << " - numCellsPerProcess: number of different tetrahedrons "
"to process (to vary the memory addresses, each tetrahedron "
"is measured individually)"
<< std::endl;
std::cout << " - numOuterSORIterations: number of measured iterations "
"per tetrahedron"
<< std::endl;
std::cout << " - numInnerSORIterations: number of relaxation iterations "
"performed per measurement/outer iteration"
<< std::endl;
std::cout << " - outputFile.csv: file to output the detailed "
"measurements per rank, cell and outer iteration"
<< std::endl;
}
MPI_Finalize();
return EXIT_SUCCESS;
} else {
level = std::atoi(argv[1]);
numCellsPerProcess = std::atoi(argv[2]);
numOuterSORIterations = std::atoi(argv[3]);
numInnerSORIterations = std::atoi(argv[4]);
outputFile = std::string(argv[5]);
}
runBenchmark(level, numCellsPerProcess, numOuterSORIterations,
numInnerSORIterations, outputFile);
MPI_Finalize();
}
\ No newline at end of file
import csv
import matplotlib.pyplot as plt
import sys
def read_csv(csv_file):
data = []
with open(csv_file, mode='r') as infile:
reader = csv.DictReader(infile)
for i, r in enumerate(reader):
if i == 0:
info = r
else:
data.append(r)
return info, data
def plot_comparison(csv_file, cell_id, outer_iteration):
info, data = read_csv(csv_file)
num_processes = info['num_processes']
plot_data = [float(v["time"]) for v in data if v["outer_iteration"] == str(outer_iteration) and v["cell_id"] == str(cell_id)]
# plt.subplot(1, len(db_files), i+1)
# plt.plot(plot_data, label="timings");
# plt.plot(sorted(plot_data), label="timings sorted");
plt.bar(list(range(len(plot_data))), plot_data, width=1.0, align='center')
plt.title("{} processes, cell {}, outer iteration {}".format(int(num_processes), cell_id, outer_iteration))
plt.xlabel("ranks")
plt.ylabel("time in seconds")
plt.show()
if __name__ == "__main__":
usage = "Plot benchmark csv data.\nUsage: plot_csv.py <data.csv> <cell_id> <outer_iteration>"
if len(sys.argv) != 4:
print(usage)
else:
plot_comparison(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]))
\ No newline at end of file
snoop_0_1.png

34.3 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment