diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 324e0dbb4f37788f83c42614b7aeb0923e86a0f8..d62e9607d1325aad734458e33f032faaa0251697 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -86,9 +86,10 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage if (para->useReducedCommunicationAfterFtoC) { prepareExchangeMultiGPU(para, level, -1); // TODO exchangeMultiGPU(para, comm, cudaManager, level, -1); // TODO + } else { + prepareExchangeMultiGPU(para, level, -1); + exchangeMultiGPU(para, comm, cudaManager, level, -1); } - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); coarseToFine(para, level); } else { @@ -216,6 +217,14 @@ void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex) prepareExchangeCollDataXGPU27(para, level, streamIndex); prepareExchangeCollDataYGPU27(para, level, streamIndex); prepareExchangeCollDataZGPU27(para, level, streamIndex); + } +} + +void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex) { + if (para->getNumprocs() > 1) { + prepareExchangeCollDataXGPU27(para, level, streamIndex); + prepareExchangeCollDataYGPU27(para, level, streamIndex, true); + prepareExchangeCollDataZGPU27(para, level, streamIndex); } } diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 7a58feb622ad7ed51604b96f7d005d0957fe741e..20da69d3245f746e18d526ae3e47b30ada30bf53 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -27,6 +27,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt extern "C" void collisionAdvectionDiffusion(Parameter* para, int level); extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex); +extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex); extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 20b100f035164b3d283921366f9dea6021731dd8..29e5cb4890b9cb24cfbf2c15d98f65eb00cab1c5 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -108,14 +108,20 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); + std::vector<ProcessNeighbor27> *sendProcessNeighbor; + if (useReducedCommunicationAfterFtoC) + sendProcessNeighbor = ¶->getParD(level)->sendProcessNeighborsAfterFtoCY; + else + sendProcessNeighbor = ¶->getParD(level)->sendProcessNeighborY; + for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], para->getParD(level)->sendProcessNeighborY[i].f[0], para->getParD(level)->sendProcessNeighborY[i].index, - para->getParD(level)->sendProcessNeighborY[i].numberOfNodes, + (*sendProcessNeighbor)[i].numberOfNodes, para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, @@ -133,6 +139,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex); + // todo: vorher pointer auf para->getParD(level)->sendProcessNeighborY[i].f[0] für sendProcessNeighborsAfterFtoCY übernehmen /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start non blocking MPI receive diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index 5b494c258e428aafcddfc60903c9962fbbc30469..6b537242ceddc6e33ac4f3fe65e466411a1b46f1 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -17,7 +17,7 @@ extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator extern "C" void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex); extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); -extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex); +extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false); extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); extern "C" void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex); diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h index 4f3a56b875567f036c0907ecf6ce3df249d38374..a33b3b792cd451307825fd0b2c8716e942440582 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/LB.h +++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h @@ -235,11 +235,6 @@ typedef struct PN27{ int numberOfFs; }ProcessNeighbor27; -typedef struct PN27AfterFtoC { - uint memsizeFs; - int numberOfNodes; -} ProcessNeighbor27AfterFtoC; - typedef struct PN_F3 { real* g[6]; uint memsizeGs; diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 4c702b6bdf52a751c8f7e3880e1542de6fef725f..88531ea387d898986e72216164b6d4be8b205770 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -1436,7 +1436,7 @@ void Parameter::setIsNeighborZ(bool isNeigbor) void Parameter::setSendProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex) { this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes; this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes; - this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) *numberOfNodes; + this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs = sizeof(real) * numberOfNodes; } void Parameter::setSendProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex) diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 6bd504aae241289a6862d9ab800a775df634532b..d05da93f3cdb401365c7953211158123f731f5a9 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -294,12 +294,12 @@ struct LBMSimulationParameter std::vector<ProcessNeighbor27> recvProcessNeighborY; std::vector<ProcessNeighbor27> recvProcessNeighborZ; - std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCX; - std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCY; - std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCZ; - std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCX; - std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCY; - std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCZ; + std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCX; + std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCY; + std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCZ; + std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCX; + std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCY; + std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCZ; /////////////////////////////////////////////////////// // 3D domain decomposition convection diffusion std::vector<ProcessNeighbor27> sendProcessNeighborADX;