diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index b7de4658fee51be3ca4838974b8749e7c49b5915..6e73c1e88faca72d0c96af53f60d4d87ae5309f3 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -196,9 +196,9 @@ void collisionAdvectionDiffusion(Parameter* para, int level) void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex) { if (para->getNumprocs() > 1) { - prepareExchangePostCollDataXGPU27(para, level, streamIndex); - prepareExchangePostCollDataYGPU27(para, level, streamIndex); - prepareExchangePostCollDataZGPU27(para, level, streamIndex); + prepareExchangeCollDataXGPU27(para, level, streamIndex); + prepareExchangeCollDataYGPU27(para, level, streamIndex); + prepareExchangeCollDataZGPU27(para, level, streamIndex); } } @@ -211,9 +211,9 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa ////////////////////////////////////////////////////////////////////////// //3D domain decomposition - exchangePostCollDataXGPU27(para, comm, cudaManager, level, streamIndex); - exchangePostCollDataYGPU27(para, comm, cudaManager, level, streamIndex); - exchangePostCollDataZGPU27(para, comm, cudaManager, level, streamIndex); + exchangeCollDataXGPU27(para, comm, cudaManager, level, streamIndex); + exchangeCollDataYGPU27(para, comm, cudaManager, level, streamIndex); + exchangeCollDataZGPU27(para, comm, cudaManager, level, streamIndex); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition convection diffusion diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 4bbc63378c81bc6b97f2a217edc92d24d2f75a9d..4a0cae9b223f0170ae1831406787bd5b777729a5 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -7,88 +7,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // X //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) -//{ -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Device to Host -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// { -// GetSendFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->sendProcessNeighborX[i].f[0], -// para->getParD(level)->sendProcessNeighborX[i].index, -// para->getParD(level)->sendProcessNeighborX[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads); -// ////////////////////////////////////////////////////////////////////////// -// cudaManager->cudaCopyProcessNeighborXFsDH(level, i); -// } -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start non blocking MPI receive -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// { -// comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0], -// para->getParH(level)->recvProcessNeighborX[i].numberOfFs, -// para->getParH(level)->recvProcessNeighborX[i].rankNeighbor); -// } -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////start non blocking MPI send -// //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// //{ -// // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], -// // para->getParH(level)->sendProcessNeighborX[i].numberOfFs, -// // para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////Waitall -// //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) -// //{ -// // comm->waitallGPU(); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start blocking MPI send -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// { -// comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], -// para->getParH(level)->sendProcessNeighborX[i].numberOfFs, -// para->getParH(level)->sendProcessNeighborX[i].rankNeighbor); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //Wait -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// { -// comm->waitGPU(i); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //reset the request array -// if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) -// { -// comm->resetRequest(); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Host to Device -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) -// { -// cudaManager->cudaCopyProcessNeighborXFsHD(level, i); -// ////////////////////////////////////////////////////////////////////////// -// SetRecvFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->recvProcessNeighborX[i].f[0], -// para->getParD(level)->recvProcessNeighborX[i].index, -// para->getParD(level)->recvProcessNeighborX[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangePostCollDataXGPU27(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) @@ -105,7 +24,7 @@ void prepareExchangePostCollDataXGPU27(Parameter *para, int level, int streamInd stream); } -void exchangePostCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); @@ -188,100 +107,7 @@ void exchangePostCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, -// int streamIndex) -//{ -// cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Device to Host -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// { -// GetSendFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->sendProcessNeighborY[i].f[0], -// para->getParD(level)->sendProcessNeighborY[i].index, -// para->getParD(level)->sendProcessNeighborY[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads, -// stream); -// //////////////////////////////////////////////////////////////////////////); -// cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex); -// } -// -// //if (para->getUseStreams() && startBulkKernel!=nullptr) -// // cudaEventRecord(*startBulkKernel, stream); -// -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start non blocking MPI receive -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// { -// comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0], -// para->getParH(level)->recvProcessNeighborY[i].numberOfFs, -// para->getParH(level)->recvProcessNeighborY[i].rankNeighbor); -// } -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////start non blocking MPI send -// //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// //{ -// // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], -// // para->getParH(level)->sendProcessNeighborY[i].numberOfFs, -// // para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////Waitall -// //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) -// //{ -// // comm->waitallGPU(); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // wait for memcopy host to device to finish before sending data -// if (para->getUseStreams()) -// cudaStreamSynchronize(stream); -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start blocking MPI send -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// { -// comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], -// para->getParH(level)->sendProcessNeighborY[i].numberOfFs, -// para->getParH(level)->sendProcessNeighborY[i].rankNeighbor); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //Wait -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// { -// comm->waitGPU(i); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //reset the request array -// if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) -// { -// comm->resetRequest(); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Host to Device -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) -// { -// cudaManager->cudaCopyProcessNeighborYFsHD(level, i, streamIndex); -// ////////////////////////////////////////////////////////////////////////// -// SetRecvFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->recvProcessNeighborY[i].f[0], -// para->getParD(level)->recvProcessNeighborY[i].index, -// para->getParD(level)->recvProcessNeighborY[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads, -// stream); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//} - -void prepareExchangePostCollDataYGPU27(Parameter *para, int level, int streamIndex) +void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -298,7 +124,7 @@ void prepareExchangePostCollDataYGPU27(Parameter *para, int level, int streamInd stream); } -void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); @@ -382,88 +208,7 @@ void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) -//{ -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Device to Host -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// { -// GetSendFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->sendProcessNeighborZ[i].f[0], -// para->getParD(level)->sendProcessNeighborZ[i].index, -// para->getParD(level)->sendProcessNeighborZ[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads); -// ////////////////////////////////////////////////////////////////////////// -// cudaManager->cudaCopyProcessNeighborZFsDH(level, i); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start non blocking MPI receive -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// { -// comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0], -// para->getParH(level)->recvProcessNeighborZ[i].numberOfFs, -// para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor); -// } -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////start non blocking MPI send -// //for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// //{ -// // comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], -// // para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, -// // para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ////Waitall -// //if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) -// //{ -// // comm->waitallGPU(); -// //} -// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //start blocking MPI send -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// { -// comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], -// para->getParH(level)->sendProcessNeighborZ[i].numberOfFs, -// para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //Wait -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// { -// comm->waitGPU(i); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //reset the request array -// if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) -// { -// comm->resetRequest(); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// //copy Host to Device -// for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) -// { -// cudaManager->cudaCopyProcessNeighborZFsHD(level, i); -// ////////////////////////////////////////////////////////////////////////// -// SetRecvFsPreDev27(para->getParD(level)->d0SP.f[0], -// para->getParD(level)->recvProcessNeighborZ[i].f[0], -// para->getParD(level)->recvProcessNeighborZ[i].index, -// para->getParD(level)->recvProcessNeighborZ[i].numberOfNodes, -// para->getParD(level)->neighborX_SP, -// para->getParD(level)->neighborY_SP, -// para->getParD(level)->neighborZ_SP, -// para->getParD(level)->size_Mat_SP, -// para->getParD(level)->evenOrOdd, -// para->getParD(level)->numberofthreads); -// } -// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void prepareExchangePostCollDataZGPU27(Parameter *para, int level, int streamIndex) { +void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], @@ -479,7 +224,7 @@ void prepareExchangePostCollDataZGPU27(Parameter *para, int level, int streamInd stream); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, +void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index d268d096c4b8eadb373106ea1e33c45f3f641aa6..5b494c258e428aafcddfc60903c9962fbbc30469 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -13,20 +13,16 @@ extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition -// unused functions - should be removed? -// extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -// extern "C" void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level); -// extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void prepareExchangePostCollDataXGPU27(Parameter *para, int level, int streamIndex); -extern "C" void exchangePostCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, - int level, int streamIndex); -extern "C" void prepareExchangePostCollDataYGPU27(Parameter *para, int level, int streamIndex); -extern "C" void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, - int level, int streamIndex); -extern "C" void prepareExchangePostCollDataZGPU27(Parameter *para, int level, int streamIndex); -extern "C" void exchangePostCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, - int level, int streamIndex); +extern "C" void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex); +extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex); +extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex); +extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex); +extern "C" void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex); +extern "C" void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition convection diffusion extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);