diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp index 18f6432e2d13fb605c1de4a311b9af09e7822810..ded3d2f3315d651c94add505e142ee585063d13a 100644 --- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp +++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp @@ -143,12 +143,12 @@ void multipleLevel(const std::string& configPath) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz(); + vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); vf::basics::ConfigurationFile config; config.load(configPath); - SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID()); + SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID()); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -206,7 +206,7 @@ void multipleLevel(const std::string& configPath) SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager); - Simulation sim; + Simulation sim(communicator); SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter()); SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance(); SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance(); @@ -334,8 +334,6 @@ void multipleLevel(const std::string& configPath) int main( int argc, char* argv[]) { - MPI_Init(&argc, &argv); - try { vf::logging::Logger::initalizeLogger(); @@ -362,6 +360,5 @@ int main( int argc, char* argv[]) VF_LOG_CRITICAL("Unknown exception!"); } - MPI_Finalize(); return 0; } diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp index bc0fdfa440a1eb1fa466bccf3a68e6216a513fbb..88ec364ea0e7d6a9010d67dac26f4a442db45e8f 100644 --- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp +++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp @@ -54,6 +54,7 @@ #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h" +#include <logger/Logger.h> //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -187,12 +188,12 @@ void multipleLevel(const std::string& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz(); + vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance(); vf::basics::ConfigurationFile config; config.load(configPath); - SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID()); + SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID()); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// const real velocityLB = (real)0.0844; // LB units @@ -323,7 +324,7 @@ void multipleLevel(const std::string& configPath) SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager); - Simulation sim; + Simulation sim (communicator); SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter()); SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance(); SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance(); @@ -717,31 +718,31 @@ std::string chooseVariation() int main( int argc, char* argv[]) { - MPI_Init(&argc, &argv); - if ( argv != NULL ) + try { - try - { - // assuming that the config files is stored parallel to this file. - std::filesystem::path filePath = __FILE__; - filePath.replace_filename("configDrivenCavity.txt"); + vf::logging::Logger::initalizeLogger(); - multipleLevel(filePath.string()); - } - catch (const std::bad_alloc& e) - { - *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n"; - } - catch (const std::exception& e) - { - *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n"; - } - catch (...) - { - *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n"; - } + // assuming that the config files is stored parallel to this file. + std::filesystem::path filePath = __FILE__; + filePath.replace_filename("configDrivenCavity.txt"); + + multipleLevel(filePath.string()); + } + catch (const spdlog::spdlog_ex &ex) { + std::cout << "Log initialization failed: " << ex.what() << std::endl; + } + catch (const std::bad_alloc& e) + { + VF_LOG_CRITICAL("Bad Alloc: {}", e.what()); + } + catch (const std::exception& e) + { + VF_LOG_CRITICAL("exception: {}", e.what()); + } + catch (...) + { + VF_LOG_CRITICAL("Unknown exception!"); } - MPI_Finalize(); return 0; } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 9b90e03648c3485dc496dac86deadeb7247e6a58..71757d073fb30bc888173fa47adf7236e199f3a5 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -1,4 +1,4 @@ -#include "Calculation/UpdateGrid27.h" +#include "UpdateGrid27.h" #include <cuda_runtime.h> #include <helper_cuda.h> #include "Calculation/DragLift.h" @@ -9,7 +9,7 @@ #include "Kernel/Kernel.h" void updateGrid27(Parameter* para, - vf::gpu::Communicator* comm, + vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, @@ -149,7 +149,7 @@ void collisionAdvectionDiffusion(Parameter* para, int level) } } -void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { if (para->getNumprocs() > 1) { diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index c66d6afd40e4261ce0a6800c6239071c81c95179..ac0d13d6454e4002117dc2874df98172f8e79a30 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -11,7 +11,7 @@ class Kernel; extern "C" void updateGrid27(Parameter* para, - vf::gpu::Communicator* comm, + vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, @@ -24,7 +24,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt extern "C" void collisionAdvectionDiffusion(Parameter* para, int level); -extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp index 32fd45109538101a35e253caff102c4f4df1a4a5..2743f454e321bf21cb4d0b7fd08aab8600a2bee8 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp @@ -1,9 +1,9 @@ #include "Communicator.h" + #include <mpi.h> -#include <stdio.h> -#include <stdlib.h> #include <vector> -#include <string.h> + +#include <logger/Logger.h> #if defined (_WIN32) || defined (_WIN64) #include <Winsock2.h> @@ -12,13 +12,19 @@ #endif //lib for windows Ws2_32.lib -namespace vf -{ -namespace gpu +namespace vf::gpu { + Communicator::Communicator() { + int mpiInitialized = 0; // false + MPI_Initialized(&mpiInitialized); + if (!mpiInitialized) { + MPI_Init(NULL, NULL); + VF_LOG_TRACE("vf::gpu::Communicator(): MPI_Init"); + } + MPI_Comm_rank(MPI_COMM_WORLD, &PID); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); @@ -29,21 +35,25 @@ Communicator::Communicator() // Get my position in this communicator, and my neighbors MPI_Cart_shift(comm1d, 0, 1, &nbrbottom, &nbrtop); } -// Crap by Martin Sch. -Communicator::Communicator(const int numberOfProcs) + +Communicator::~Communicator() { - MPI_Comm_rank(MPI_COMM_WORLD, &PID); - MPI_Comm_size(MPI_COMM_WORLD, &numprocs); - commGPU = MPI_COMM_WORLD; - requestGPU.resize(0); - rcount = 0; + // proof if MPI is finalized + int _mpiFinalized = 0; // false + MPI_Finalized(&_mpiFinalized); + if (!_mpiFinalized) { + MPI_Finalize(); + VF_LOG_TRACE("vf::gpu::~Communicator(): MPI_Finalize"); + } } -Communicator *Communicator::instanz = 0; -Communicator *Communicator::getInstanz() + + +// C++11 thread safe singelton implementation: +// https://stackoverflow.com/questions/1661529/is-meyers-implementation-of-the-singleton-pattern-thread-safe +Communicator& Communicator::getInstance() { - if (instanz == 0) - instanz = new Communicator(0); - return instanz; + static Communicator comm; + return comm; } void Communicator::exchngBottomToTop(float *sbuf, float *rbuf, int count) @@ -189,7 +199,7 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec counter++; } if (counter >= maxdev) { - fprintf(stderr, "More processes than GPUs!\n"); + VF_LOG_CRITICAL("More processes than GPUs!"); exit(1); } map[i] = devices[counter]; @@ -198,12 +208,11 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec MPI_Scatter(map, 1, MPI_UNSIGNED, &device, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD); - printf("Rank: %d runs on host: %s with GPU: %d\n", rank, hostname, device); + VF_LOG_INFO("Rank: {} runs on host: {} with GPU: {}", rank, hostname, device); free(map); free(host); return device; } -} // namespace GPU -} // namespace VF +} diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h index 72c4a136ece03098c10ea65493ba02a0109ed95d..256dde87e8ff6b3a8c7abcae0ac31466cc68ba95 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h +++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h @@ -3,14 +3,11 @@ #include <vector> - - - #include <mpi.h> #include "VirtualFluids_GPU_export.h" -#include "LBM/LB.h" +#include <basics/Core/DataTypes.h> ////////////////////////////////// #ifdef VF_DOUBLE_ACCURACY @@ -21,48 +18,46 @@ ////////////////////////////////// -namespace vf -{ -namespace gpu +namespace vf::gpu { class VIRTUALFLUIDS_GPU_EXPORT Communicator { public: - static Communicator* getInstanz(); - static Communicator* getInstanz(const int numberOfProcs); - void exchngBottomToTop(float* sbuf, float* rbuf, int count); - void exchngTopToBottom(float* sbuf, float* rbuf, int count); - void waitAll(); - void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode); - int getPID() const; - int getNummberOfProcess() const; - int getNeighbourTop(); - int getNeighbourBottom(); - void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count); - void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb); - ////////////////////////////////////////////////////////////////////////// - void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank); - void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank); - void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank ); - void nbSendDataGPU( real* sbuf, int count_s, int nb_rank ); - void waitallGPU(); - void sendDataGPU( real* sbuf, int count_s, int nb_rank ); - void waitGPU(int id); - void resetRequest(); - void barrierGPU(); - void barrier(); - ////////////////////////////////////////////////////////////////////////// - void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count); - MPI_Comm getCommunicator(); - void startTimer(); - void stopTimer(); - double getTime(); - int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev); -protected: + static Communicator& getInstance(); + Communicator(const Communicator&) = delete; + Communicator& operator=(const Communicator&) = delete; + + void exchngBottomToTop(float* sbuf, float* rbuf, int count); + void exchngTopToBottom(float* sbuf, float* rbuf, int count); + void waitAll(); + void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode); + int getPID() const; + int getNummberOfProcess() const; + int getNeighbourTop(); + int getNeighbourBottom(); + void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count); + void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb); + ////////////////////////////////////////////////////////////////////////// + void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank); + void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank); + void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank ); + void nbSendDataGPU( real* sbuf, int count_s, int nb_rank ); + void waitallGPU(); + void sendDataGPU( real* sbuf, int count_s, int nb_rank ); + void waitGPU(int id); + void resetRequest(); + void barrierGPU(); + void barrier(); + ////////////////////////////////////////////////////////////////////////// + void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count); + MPI_Comm getCommunicator(); + void startTimer(); + void stopTimer(); + double getTime(); + int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev); private: - static Communicator* instanz; int numprocs, PID; int nbrbottom, nbrtop; MPI_Comm comm1d, commGPU; @@ -75,12 +70,10 @@ private: double starttime; double endtime; Communicator(); - Communicator(const int numberOfProcs); - Communicator(const Communicator&); + ~Communicator(); }; -} // namespace GPU -} // namespace VF +} #endif diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 8f89656ac6feb7dfe2644a2b6d604ccec510c3cb..d91e86c3140bb08aa2d8ef28d7cc147b23a2b804 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -7,7 +7,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // X //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -30,7 +30,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0], para->getParH(level)->recvProcessNeighborX[i].numberOfFs, para->getParH(level)->recvProcessNeighborX[i].rankNeighbor); } @@ -38,7 +38,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); //} @@ -46,13 +46,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], para->getParH(level)->sendProcessNeighborX[i].numberOfFs, para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); } @@ -60,13 +60,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -88,7 +88,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -111,7 +111,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0], para->getParH(level)->recvProcessNeighborX[i].numberOfFs, para->getParH(level)->recvProcessNeighborX[i].rankNeighbor); } @@ -119,7 +119,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); //} @@ -127,13 +127,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], para->getParH(level)->sendProcessNeighborX[i].numberOfFs, para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); } @@ -141,13 +141,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -176,7 +176,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -199,7 +199,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], para->getParH(level)->recvProcessNeighborY[i].numberOfFs, para->getParH(level)->recvProcessNeighborY[i].rankNeighbor); } @@ -207,7 +207,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); //} @@ -215,13 +215,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], para->getParH(level)->sendProcessNeighborY[i].numberOfFs, para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); } @@ -229,13 +229,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -257,7 +257,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -280,7 +280,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], para->getParH(level)->recvProcessNeighborY[i].numberOfFs, para->getParH(level)->recvProcessNeighborY[i].rankNeighbor); } @@ -288,7 +288,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); //} @@ -296,13 +296,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], para->getParH(level)->sendProcessNeighborY[i].numberOfFs, para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); } @@ -310,13 +310,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -345,7 +345,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -368,7 +368,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0], para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor); } @@ -376,7 +376,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); //} @@ -384,13 +384,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); } @@ -398,13 +398,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -426,7 +426,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -449,7 +449,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0], para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor); } @@ -457,7 +457,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); //} @@ -465,13 +465,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); } @@ -479,13 +479,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -529,7 +529,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //1D domain decomposition //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++) { @@ -547,7 +547,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda ////////////////////////////////////////////////////////////////////////// cudaManager->cudaCopyProcessNeighborFsDH(level, i); ////////////////////////////////////////////////////////////////////////// - comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], + comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], para->getParH(level)->sendProcessNeighbor[i].numberOfFs, para->getParH(level)->recvProcessNeighbor[i].f[0], para->getParH(level)->recvProcessNeighbor[i].numberOfFs, @@ -574,7 +574,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++) { @@ -592,7 +592,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////////////////////////////////////////////////////////////////////////// cudaManager->cudaCopyProcessNeighborFsDH(level, i); ////////////////////////////////////////////////////////////////////////// - comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], + comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], para->getParH(level)->sendProcessNeighbor[i].numberOfFs, para->getParH(level)->recvProcessNeighbor[i].f[0], para->getParH(level)->recvProcessNeighbor[i].numberOfFs, @@ -623,7 +623,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //// X ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) // { @@ -641,7 +641,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborXFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, // para->getParH(level)->recvProcessNeighborX[i].f[0], // para->getParH(level)->recvProcessNeighborX[i].numberOfFs, @@ -663,7 +663,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // } //} ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) // { @@ -681,7 +681,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborXFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, // para->getParH(level)->recvProcessNeighborX[i].f[0], // para->getParH(level)->recvProcessNeighborX[i].numberOfFs, @@ -710,7 +710,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //// Y ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) // { @@ -728,7 +728,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborYFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->recvProcessNeighborY[i].f[0], // para->getParH(level)->recvProcessNeighborY[i].numberOfFs, @@ -750,7 +750,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // } //} ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) // { @@ -768,7 +768,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborYFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, // para->getParH(level)->recvProcessNeighborY[i].f[0], // para->getParH(level)->recvProcessNeighborY[i].numberOfFs, @@ -797,7 +797,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //// Z ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) // { @@ -815,7 +815,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborZFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->recvProcessNeighborZ[i].f[0], // para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, @@ -837,7 +837,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // } //} ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level) +//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level) //{ // for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) // { @@ -855,7 +855,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud // ////////////////////////////////////////////////////////////////////////// // para->cudaCopyProcessNeighborZFsDH(level, i); // ////////////////////////////////////////////////////////////////////////// -// comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], +// comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, // para->getParH(level)->recvProcessNeighborZ[i].f[0], // para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, @@ -932,7 +932,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // X //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -955,7 +955,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], para->getParH(level)->recvProcessNeighborADX[i].numberOfFs, para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor); } @@ -963,7 +963,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], // para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); //} @@ -971,13 +971,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); } @@ -985,13 +985,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1013,7 +1013,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1036,7 +1036,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0], para->getParH(level)->recvProcessNeighborADX[i].numberOfFs, para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor); } @@ -1044,7 +1044,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], // para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); //} @@ -1052,13 +1052,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0], para->getParH(level)->sendProcessNeighborADX[i].numberOfFs, para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor); } @@ -1066,13 +1066,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1101,7 +1101,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1124,7 +1124,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], para->getParH(level)->recvProcessNeighborADY[i].numberOfFs, para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor); } @@ -1132,7 +1132,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], // para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); //} @@ -1140,13 +1140,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); } @@ -1154,13 +1154,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1182,7 +1182,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1205,7 +1205,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0], para->getParH(level)->recvProcessNeighborADY[i].numberOfFs, para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor); } @@ -1213,7 +1213,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], // para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); //} @@ -1221,13 +1221,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0], para->getParH(level)->sendProcessNeighborADY[i].numberOfFs, para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor); } @@ -1235,13 +1235,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1270,7 +1270,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1293,7 +1293,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor); } @@ -1301,7 +1301,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], // para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); //} @@ -1309,13 +1309,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); } @@ -1323,13 +1323,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1351,7 +1351,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1374,7 +1374,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], + comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0], para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs, para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor); } @@ -1382,7 +1382,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, ////start non blocking MPI send //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) //{ - // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + // comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], // para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, // para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); //} @@ -1390,13 +1390,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, ////Waitall //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) //{ - // comm->waitallGPU(); + // comm.waitallGPU(); //} ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], + comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0], para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs, para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor); } @@ -1404,13 +1404,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1486,7 +1486,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // X //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1510,7 +1510,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3X[i].g[0], para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor); @@ -1519,7 +1519,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3X[i].g[0], para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor); @@ -1528,13 +1528,13 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1564,7 +1564,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1588,7 +1588,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3Y[i].g[0], para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor); @@ -1597,7 +1597,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3Y[i].g[0], para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor); @@ -1606,13 +1606,13 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device @@ -1642,7 +1642,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -1666,7 +1666,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start non blocking MPI receive for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->nbRecvDataGPU( + comm.nbRecvDataGPU( para->getParH(level)->recvProcessNeighborF3Z[i].g[0], para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs, para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor); @@ -1675,7 +1675,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->sendDataGPU( + comm.sendDataGPU( para->getParH(level)->sendProcessNeighborF3Z[i].g[0], para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs, para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor); @@ -1684,13 +1684,13 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe //Wait for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) { - comm->waitGPU(i); + comm.waitGPU(i); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //reset the request array if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) { - comm->resetRequest(); + comm.resetRequest(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Host to Device diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index 34c9cba801c675bdf4c2cd39daca3be2d7918dbe..82662cdc55e8b0ff5f4afe7d31a6563579b45559 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -9,31 +9,31 @@ ////////////////////////////////////////////////////////////////////////// //1D domain decomposition -extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition -extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition convection diffusion -extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition F3 - K18/K20 -extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// -extern "C" void barrierGPU(vf::gpu::Communicator* comm); +extern "C" void barrierGPU(vf::gpu::Communicator& comm); ////////////////////////////////////////////////////////////////////////// #endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 897f9b4af238dcdc98083d14ebd10c42523ba9d6..62d3a944176617996abd90ce6ba9b19973afeb04 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -50,6 +50,10 @@ #include <logger/Logger.h> +Simulation::Simulation(vf::gpu::Communicator& communicator) : communicator(communicator) +{ + +} std::string getFileName(const std::string& fname, int step, int myID) { @@ -80,10 +84,9 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std this->gridProvider = gridProvider; this->cudaManager = cudaManager; gridProvider->initalGridInformations(); - comm = vf::gpu::Communicator::getInstanz(); this->para = para; - vf::cuda::verifyAndSetDevice(comm->mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev())); + vf::cuda::verifyAndSetDevice(communicator.mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev())); para->initLBMSimulationParameter(); @@ -251,7 +254,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// //output << "define the Grid..." ; - //defineGrid(para, comm); + //defineGrid(para, communicator); ////allocateMemory(); //output << "done.\n"; @@ -414,7 +417,7 @@ void Simulation::run() //////////////////////////////////////////////////////////////////////////////// for(t=para->getTStart();t<=para->getTEnd();t++) { - updateGrid27(para.get(), comm, cudaManager.get(), pm, 0, t, kernels); + updateGrid27(para.get(), communicator, cudaManager.get(), pm, 0, t, kernels); //////////////////////////////////////////////////////////////////////////////// //Particles @@ -429,7 +432,7 @@ void Simulation::run() // run Analyzers for kinetic energy and enstrophy for TGV in 3D // these analyzers only work on level 0 //////////////////////////////////////////////////////////////////////////////// - if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0); + if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), communicator, cudaManager.get(), 0); if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t); if( this->enstrophyAnalyzer ) this->enstrophyAnalyzer->run(t); @@ -623,7 +626,7 @@ void Simulation::run() //////////////////////////////////////////////////////////////////////////////// // File IO //////////////////////////////////////////////////////////////////////////////// - //comm->startTimer(); + //communicator->startTimer(); if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getTStartOut()) { ////////////////////////////////////////////////////////////////////////////////// @@ -669,7 +672,7 @@ void Simulation::run() { ////////////////////////////////////////////////////////////////////////// //exchange data for valid post process - exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev); + exchangeMultiGPU(para.get(), communicator, cudaManager.get(), lev); ////////////////////////////////////////////////////////////////////////// //if (para->getD3Qxx()==19) //{ @@ -1282,8 +1285,4 @@ void Simulation::free() cudaManager->cudaFreeGeomNormals(lev); } } - ////////////////////////////////////////////////////////////////////////// - - delete comm; - } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h index 924667e678fbb1d5e05fc601086642e31088ccc5..72c86140258b01aec3b3ed00d59c271f1824d514 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h @@ -13,9 +13,7 @@ #include "VirtualFluids_GPU_export.h" -namespace vf::gpu { -class Communicator; -} +namespace vf::gpu { class Communicator; } class CudaMemoryManager; class Parameter; @@ -34,6 +32,7 @@ class TrafficMovementFactory; class VIRTUALFLUIDS_GPU_EXPORT Simulation { public: + Simulation(vf::gpu::Communicator& communicator); void run(); void init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std::shared_ptr<DataWriter> dataWriter, std::shared_ptr<CudaMemoryManager> cudaManager); void free(); @@ -63,7 +62,7 @@ protected: LogWriter output; - vf::gpu::Communicator* comm; + vf::gpu::Communicator& communicator; SPtr<Parameter> para; SPtr<GridProvider> gridProvider; SPtr<DataWriter> dataWriter; diff --git a/src/logger/Logger.h b/src/logger/Logger.h index 594decaf5bd85913335e6d1659b6d89cad6d0610..adb7796135a989843ef8de1f778c9901f3ad17c8 100644 --- a/src/logger/Logger.h +++ b/src/logger/Logger.h @@ -48,7 +48,7 @@ #define VF_LOG_TRACE(...) spdlog::trace(__VA_ARGS__) #define VF_LOG_DEBUG(...) spdlog::debug(__VA_ARGS__) #define VF_LOG_INFO(...) spdlog::info(__VA_ARGS__) -#define VF_LOG_WARNING(...) spdlog::warning(__VA_ARGS__) +#define VF_LOG_WARNING(...) spdlog::warn(__VA_ARGS__) #define VF_LOG_CRITICAL(...) spdlog::critical(__VA_ARGS__)