diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp index a5296b1202103fc1732befe3b6d8bea238841fe6..155251a3273c8976c058eddad760b8808b451433 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp @@ -231,6 +231,16 @@ std::vector<double> Communicator::gatherNUPS(double processNups) return std::vector<double>(); } +double Communicator::sumNups(double processNups) +{ + double *buffer_send = &processNups; + double *buffer_recv = (double *)malloc(sizeof(double)); + + MPI_Reduce(buffer_send, buffer_recv, 1, MPI_DOUBLE, MPI_SUM, 0, commGPU); + + return *buffer_recv; +} + void vf::gpu::Communicator::exchangeIndices(uint *rbuf, int count_r, int nb_rank_r, uint *sbuf, int count_s, int nb_rank_s) { diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h index 3308c6b3ae964144446777c62781c2c2ad3049cb..6227dbd8210ea27013ad252cf64f399c611a9d75 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h +++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h @@ -58,6 +58,7 @@ public: double getTime(); int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev); std::vector<double> gatherNUPS(double processNups); + double sumNups(double processNups); ////////////////////////////////////////////////////////////////////////// void exchangeIndices(uint *rbuf, int count_r, int nb_rank_r, uint *sbuf, int count_s, int nb_rank_s); private: diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 706f3da00fe2f98bedd1975951cc8d0a8f189a7d..342c9288ef82fb0a13a3e5e2b66db47db6b0b12e 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -706,7 +706,7 @@ void Simulation::run() ////////////////////////////////////////////////////////////////////////// averageTimer->stopTimer(); - averageTimer->outputPerformance(t, para.get()); + averageTimer->outputPerformance(t, para.get(), communicator); ////////////////////////////////////////////////////////////////////////// if( para->getPrintFiles() ) diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp index b08a4ea11e6227d460ef5913695ebebf2474a02b..4fb7b223ff8480af20075b54a7037a4d27022708 100644 --- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp @@ -31,7 +31,7 @@ void Timer::resetTimer() this->totalElapsedTime = 0.0; } -void Timer::outputPerformance(uint t, Parameter* para) +void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator) { real fnups = 0.0; real bandwidth = 0.0; @@ -42,25 +42,17 @@ void Timer::outputPerformance(uint t, Parameter* para) bandwidth += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->size_Mat_SP / (this->totalElapsedTime*1.0E9); } - if(this->firstOutput) + if(this->firstOutput && communicator.getPID() == 0) //only display the legend once { - VF_LOG_INFO(" --- {} --- Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name ); + VF_LOG_INFO("PID \t --- {} --- Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name ); this->firstOutput = false; } - VF_LOG_INFO(" --- {} --- {}/{} \t {} \t {}", this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth ); + VF_LOG_INFO(" {} \t --- {} --- {}/{} \t {} \t {}", communicator.getPID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth); - // When using multiple GPUs, get Nups of all processes - if (para->getMaxDev() > 1) { - vf::gpu::Communicator& comm=vf::gpu::Communicator::getInstance(); - std::vector<double> nups = comm.gatherNUPS(fnups); - if (comm.getPID() == 0) { - double sum = 0; - for (uint pid = 0; pid < nups.size(); pid++) { - VF_LOG_INFO("Process {}: \t NUPS in Mio: {}", pid, nups[pid]); - sum += nups[pid]; - } - VF_LOG_INFO("Sum of all processes: Nups in Mio: {}", sum); - } - } + // When using multiple GPUs, sum the nups of all processes + if (communicator.getNummberOfProcess() > 1) { + double nupsSum = communicator.sumNups(fnups); + VF_LOG_INFO("Sum of all {}\t processes: Nups in Mio: {}", communicator.getNummberOfProcess(), nupsSum); + } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h index 6432b347458e68a5089aea3de625017d6facd34b..26be785c7f76b7695656c9600bdb586804dca251 100644 --- a/src/gpu/VirtualFluids_GPU/Output/Timer.h +++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h @@ -9,6 +9,10 @@ #include "logger/Logger.h" #include "Parameter/Parameter.h" +namespace vf::gpu{ + class Communicator; +} + class Timer { public: @@ -27,7 +31,7 @@ class Timer void startTimer(); void stopTimer(); void resetTimer(); - void outputPerformance(uint t, Parameter* para); + void outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator); float getElapsedTime(){ return this->elapsedTime; } float getTotalElapsedTime(){ return this->totalElapsedTime; }