Skip to content
Snippets Groups Projects
Commit 27d858aa authored by Anna Wellmann's avatar Anna Wellmann
Browse files

Prettify timer output for multi gpu

parent 343a5229
No related branches found
No related tags found
1 merge request!104Add Communication Hiding to GPU version
......@@ -231,6 +231,16 @@ std::vector<double> Communicator::gatherNUPS(double processNups)
return std::vector<double>();
}
double Communicator::sumNups(double processNups)
{
double *buffer_send = &processNups;
double *buffer_recv = (double *)malloc(sizeof(double));
MPI_Reduce(buffer_send, buffer_recv, 1, MPI_DOUBLE, MPI_SUM, 0, commGPU);
return *buffer_recv;
}
void vf::gpu::Communicator::exchangeIndices(uint *rbuf, int count_r, int nb_rank_r, uint *sbuf, int count_s,
int nb_rank_s)
{
......
......@@ -58,6 +58,7 @@ public:
double getTime();
int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev);
std::vector<double> gatherNUPS(double processNups);
double sumNups(double processNups);
//////////////////////////////////////////////////////////////////////////
void exchangeIndices(uint *rbuf, int count_r, int nb_rank_r, uint *sbuf, int count_s, int nb_rank_s);
private:
......
......@@ -706,7 +706,7 @@ void Simulation::run()
//////////////////////////////////////////////////////////////////////////
averageTimer->stopTimer();
averageTimer->outputPerformance(t, para.get());
averageTimer->outputPerformance(t, para.get(), communicator);
//////////////////////////////////////////////////////////////////////////
if( para->getPrintFiles() )
......
......@@ -31,7 +31,7 @@ void Timer::resetTimer()
this->totalElapsedTime = 0.0;
}
void Timer::outputPerformance(uint t, Parameter* para)
void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator)
{
real fnups = 0.0;
real bandwidth = 0.0;
......@@ -42,25 +42,17 @@ void Timer::outputPerformance(uint t, Parameter* para)
bandwidth += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (this->totalElapsedTime*1.0E9);
}
if(this->firstOutput)
if(this->firstOutput && communicator.getPID() == 0) //only display the legend once
{
VF_LOG_INFO(" --- {} --- Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name );
VF_LOG_INFO("PID \t --- {} --- Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name );
this->firstOutput = false;
}
VF_LOG_INFO(" --- {} --- {}/{} \t {} \t {}", this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth );
VF_LOG_INFO(" {} \t --- {} --- {}/{} \t {} \t {}", communicator.getPID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth);
// When using multiple GPUs, get Nups of all processes
if (para->getMaxDev() > 1) {
vf::gpu::Communicator& comm=vf::gpu::Communicator::getInstance();
std::vector<double> nups = comm.gatherNUPS(fnups);
if (comm.getPID() == 0) {
double sum = 0;
for (uint pid = 0; pid < nups.size(); pid++) {
VF_LOG_INFO("Process {}: \t NUPS in Mio: {}", pid, nups[pid]);
sum += nups[pid];
}
VF_LOG_INFO("Sum of all processes: Nups in Mio: {}", sum);
}
}
// When using multiple GPUs, sum the nups of all processes
if (communicator.getNummberOfProcess() > 1) {
double nupsSum = communicator.sumNups(fnups);
VF_LOG_INFO("Sum of all {}\t processes: Nups in Mio: {}", communicator.getNummberOfProcess(), nupsSum);
}
}
\ No newline at end of file
......@@ -9,6 +9,10 @@
#include "logger/Logger.h"
#include "Parameter/Parameter.h"
namespace vf::gpu{
class Communicator;
}
class Timer
{
public:
......@@ -27,7 +31,7 @@ class Timer
void startTimer();
void stopTimer();
void resetTimer();
void outputPerformance(uint t, Parameter* para);
void outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator);
float getElapsedTime(){ return this->elapsedTime; }
float getTotalElapsedTime(){ return this->totalElapsedTime; }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment