diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 4a0cae9b223f0170ae1831406787bd5b777729a5..bd5b08c431bba65e460bbe984860a38774ea6395 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -160,6 +160,16 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe if (para->getUseStreams()) cudaStreamSynchronize(stream); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from x + if (para->getNumberOfProcessNeighborsX(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesXtoY.recvPos.size(); i++) { + std::pair<int, int> & recvPosX = para->getParH(level)->cornerNodesXtoY.recvPos[i]; + std::pair<int, int> & sendPosY = para->getParH(level)->cornerNodesXtoY.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborX[recvPosX.first].f[0][recvPosX.second]; + para->getParH(level)->sendProcessNeighborY[sendPosY.first].f[0][sendPosY.second] = f; + } + } + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -258,6 +268,27 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe // wait for memcopy device to host to finish before sending data if (para->getUseStreams()) cudaStreamSynchronize(stream); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from x + if (para->getNumberOfProcessNeighborsX(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesXtoZ.recvPos.size(); i++) { + std::pair<int, int> &recvPosX = para->getParH(level)->cornerNodesXtoZ.recvPos[i]; + std::pair<int, int> &sendPosZ = para->getParH(level)->cornerNodesXtoZ.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborX[recvPosX.first].f[0][recvPosX.second]; + para->getParH(level)->sendProcessNeighborZ[sendPosZ.first].f[0][sendPosZ.second] = f; + } + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // copy corner received node values from y + if (para->getNumberOfProcessNeighborsY(level, "receive") > 0) { + for (uint i = 0; i < para->getParH(level)->cornerNodesYtoZ.recvPos.size(); i++) { + std::pair<int, int> &recvPosY = para->getParH(level)->cornerNodesYtoZ.recvPos[i]; + std::pair<int, int> &sendPosZ = para->getParH(level)->cornerNodesYtoZ.sendPos[i]; + real &f = para->getParH(level)->recvProcessNeighborY[recvPosY.first].f[0][recvPosY.second]; + para->getParH(level)->sendProcessNeighborZ[sendPosZ.first].f[0][sendPosZ.second] = f; + } + } + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 30555cde10abaca923a9b8768466111e5b675146..6a4761963c3dd2e8928097d4690a1941c8387d9b 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -299,7 +299,14 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std //findPressQShip(para); //output << "done.\n"; - + ////////////////////////////////////////////////////////////////////////// + // find indices of corner nodes for multiGPU communication + ////////////////////////////////////////////////////////////////////////// + if (para->getDevices().size() > 2) { + output << "Find indices of corner nodes for multiGPU communication ..."; + para->findCornerNodesCommMultiGPU(); + output << "done.\n"; + } ////////////////////////////////////////////////////////////////////////// //Memory alloc for CheckPoint / Restart ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 6a75de3b0bfb61869cd8d29009cbe133ba9eaaab..2ed642ec5f7e7a324b1d87aa8d95318657f89623 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -2492,7 +2492,79 @@ void Parameter::setUseStreams() { bool Parameter::getUseStreams() { return this->useStreams; } -CudaStreamManager &Parameter::getStreamManager() -{ return this->cudaStreamManager; } +CudaStreamManager &Parameter::getStreamManager() { return this->cudaStreamManager; } + +void Parameter::findCornerNodesCommMultiGPU() { + for (uint level = 0; level < parH.size(); level++) { + findCornerNodesXY(level); + findCornerNodesXY(level); + } +} + +void Parameter::findCornerNodesXY(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborX[i].index[j]; + bool foundIndex = findIndexInSendNodesXY(level, index); + if (foundIndex) + this->parH[level]->cornerNodesXtoY.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesXY(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsY(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborY[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborY[k].index[l] == index) { + this->parH[level]->cornerNodesXtoY.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} + +void Parameter::findCornerNodesXZ(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborX[i].index[j]; + bool foundIndex = findIndexInSendNodesXZ(level, index); + if (foundIndex) + this->parH[level]->cornerNodesXtoZ.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesXZ(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsZ(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborZ[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborZ[k].index[l] == index) { + this->parH[level]->cornerNodesXtoZ.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} + +void Parameter::findCornerNodesYZ(int level) +{ + for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsY(level, "receive")); i++) + for (int j = 0; j < parH[level]->recvProcessNeighborY[i].numberOfNodes; j++) { + int index = parH[level]->recvProcessNeighborY[i].index[j]; + bool foundIndex = findIndexInSendNodesYZ(level, index); + if (foundIndex) + this->parH[level]->cornerNodesYtoZ.recvPos.push_back(std::pair(i, j)); + } +} + +bool Parameter::findIndexInSendNodesYZ(int level, int index) +{ + for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsZ(level, "send")); k++) + for (int l = 0; l < parH[level]->sendProcessNeighborZ[l].numberOfNodes; l++) + if (parH[level]->sendProcessNeighborZ[k].index[l] == index) { + this->parH[level]->cornerNodesYtoZ.sendPos.push_back(std::pair(k, l)); + return true; + } + return false; +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 782ca95f72b1a534d82a7dc434af43609a6c5fa0..66f36122f08da3910b42ade203cb6ef7fdc628e5 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -308,7 +308,16 @@ struct LBMSimulationParameter std::vector<ProcessNeighborF3> recvProcessNeighborF3Y; std::vector<ProcessNeighborF3> recvProcessNeighborF3Z; //////////////////////////////////////////////////////////////////////////// + // 3D domain decomposition: position (index in array) of corner nodes in ProcessNeighbor27 + struct cornerNodePostions { + std::vector<std::pair<int, int>> recvPos; + std::vector<std::pair<int, int>> sendPos; + }; + cornerNodePostions cornerNodesXtoY; + cornerNodePostions cornerNodesXtoZ; + cornerNodePostions cornerNodesYtoZ; + /////////////////////////////////////////////////////// uint *fluidNodeIndices; uint numberOfFluidNodes; uint *fluidNodeIndicesBorder; @@ -854,6 +863,14 @@ public: void setUseStreams(); bool getUseStreams(); CudaStreamManager &getStreamManager(); + + void findCornerNodesCommMultiGPU(); + void findCornerNodesXY(int level); + bool findIndexInSendNodesXY(int level, int index); + void findCornerNodesXZ(int level); + bool findIndexInSendNodesXZ(int level, int index); + void findCornerNodesYZ(int level); + bool findIndexInSendNodesYZ(int level, int index); }; #endif