From c5a25580df768d48a6a9a69b17dde2ec549e6a1d Mon Sep 17 00:00:00 2001 From: Anna Wellmann <a.wellmann@tu-braunschweig.de> Date: Thu, 2 Sep 2021 12:36:39 +0200 Subject: [PATCH] Fix corner node communication for MultiGPU --- .../Communication/ExchangeData27.cpp | 31 ++++++++ src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp | 9 ++- .../VirtualFluids_GPU/Parameter/Parameter.cpp | 76 ++++++++++++++++++- .../VirtualFluids_GPU/Parameter/Parameter.h | 17 +++++ 4 files changed, 130 insertions(+), 3 deletions(-) diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 4a0cae9b2..bd5b08c43 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -160,6 +160,16 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe if (para->getUseStreams()) cudaStreamSynchronize(stream); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from x + if (para->getNumberOfProcessNeighborsX(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesXtoY.recvPos.size(); i++) { + std::pair<int, int> & recvPosX = para->getParH(level)->cornerNodesXtoY.recvPos[i]; + std::pair<int, int> & sendPosY = para->getParH(level)->cornerNodesXtoY.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborX[recvPosX.first].f[0][recvPosX.second]; + para->getParH(level)->sendProcessNeighborY[sendPosY.first].f[0][sendPosY.second] = f; + } + } + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -258,6 +268,27 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe // wait for memcopy device to host to finish before sending data if (para->getUseStreams()) cudaStreamSynchronize(stream); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from x + if (para->getNumberOfProcessNeighborsX(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesXtoZ.recvPos.size(); i++) { + std::pair<int, int> &recvPosX = para->getParH(level)->cornerNodesXtoZ.recvPos[i]; + std::pair<int, int> &sendPosZ = para->getParH(level)->cornerNodesXtoZ.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborX[recvPosX.first].f[0][recvPosX.second]; + para->getParH(level)->sendProcessNeighborZ[sendPosZ.first].f[0][sendPosZ.second] = f; + } + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from y + if (para->getNumberOfProcessNeighborsY(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesYtoZ.recvPos.size(); i++) { + std::pair<int, int> &recvPosY = para->getParH(level)->cornerNodesYtoZ.recvPos[i]; + std::pair<int, int> &sendPosZ = para->getParH(level)->cornerNodesYtoZ.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborY[recvPosY.first].f[0][recvPosY.second]; + para->getParH(level)->sendProcessNeighborZ[sendPosZ.first].f[0][sendPosZ.second] = f; + } + } + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 30555cde1..6a4761963 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -299,7 +299,14 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std //findPressQShip(para); //output << "done.\n"; - + ////////////////////////////////////////////////////////////////////////// + // find indices of corner nodes for multiGPU communication + ////////////////////////////////////////////////////////////////////////// + if (para->getDevices().size() > 2) { + output << "Find indices of corner nodes for multiGPU communication ..."; + para->findCornerNodesCommMultiGPU(); + output << "done.\n"; + } ////////////////////////////////////////////////////////////////////////// //Memory alloc for CheckPoint / Restart ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 6a75de3b0..2ed642ec5 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -2492,7 +2492,79 @@ void Parameter::setUseStreams() { bool Parameter::getUseStreams() { return this->useStreams; } -CudaStreamManager &Parameter::getStreamManager() -{ return this->cudaStreamManager; } +CudaStreamManager &Parameter::getStreamManager() { return this->cudaStreamManager; } + +void Parameter::findCornerNodesCommMultiGPU() { + for (uint level = 0; level < parH.size(); level++) { + findCornerNodesXY(level); + findCornerNodesXY(level); + } +} + +void Parameter::findCornerNodesXY(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborX[i].index[j]; + bool foundIndex = findIndexInSendNodesXY(level, index); + if (foundIndex) + this->parH[level]->cornerNodesXtoY.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesXY(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsY(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborY[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborY[k].index[l] == index) { + this->parH[level]->cornerNodesXtoY.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} + +void Parameter::findCornerNodesXZ(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborX[i].index[j]; + bool foundIndex = findIndexInSendNodesXZ(level, index); + if (foundIndex) + this->parH[level]->cornerNodesXtoZ.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesXZ(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsZ(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborZ[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborZ[k].index[l] == index) { + this->parH[level]->cornerNodesXtoZ.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} + +void Parameter::findCornerNodesYZ(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsY(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborY[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborY[i].index[j]; + bool foundIndex = findIndexInSendNodesYZ(level, index); + if (foundIndex) + this->parH[level]->cornerNodesYtoZ.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesYZ(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsZ(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborZ[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborZ[k].index[l] == index) { + this->parH[level]->cornerNodesYtoZ.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 782ca95f7..66f36122f 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -308,7 +308,16 @@ struct LBMSimulationParameter std::vector<ProcessNeighborF3> recvProcessNeighborF3Y; std::vector<ProcessNeighborF3> recvProcessNeighborF3Z; //////////////////////////////////////////////////////////////////////////// + // 3D domain decomposition: position (index in array) of corner nodes in ProcessNeighbor27 + struct cornerNodePostions { + std::vector<std::pair<int, int>> recvPos; + std::vector<std::pair<int, int>> sendPos; + }; + cornerNodePostions cornerNodesXtoY; + cornerNodePostions cornerNodesXtoZ; + cornerNodePostions cornerNodesYtoZ; + /////////////////////////////////////////////////////// uint *fluidNodeIndices; uint numberOfFluidNodes; uint *fluidNodeIndicesBorder; @@ -854,6 +863,14 @@ public: void setUseStreams(); bool getUseStreams(); CudaStreamManager &getStreamManager(); + + void findCornerNodesCommMultiGPU(); + void findCornerNodesXY(int level); + bool findIndexInSendNodesXY(int level, int index); + void findCornerNodesXZ(int level); + bool findIndexInSendNodesXZ(int level, int index); + void findCornerNodesYZ(int level); + bool findIndexInSendNodesYZ(int level, int index); }; #endif -- GitLab