diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 3ee6f3189f0864b4924280b8bd0d30334d988e8b..be56085e3cc003c9fc7ed68b9a4550493d351cb5 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -87,8 +87,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int lev { fineToCoarse(para.get(), level); - prepareExchangeMultiGPUAfterFtoC(para.get(), level, -1); - exchangeMultiGPUAfterFtoC(para.get(), comm, cudaManager.get(), level, -1); + exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, true); coarseToFine(para.get(), level); } @@ -97,8 +96,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level) { fineToCoarse(para.get(), level); - prepareExchangeMultiGPU(para.get(), level, -1); - exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); + exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false); coarseToFine(para.get(), level); } @@ -113,15 +111,13 @@ void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigne { collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes, -1); - prepareExchangeMultiGPU(para.get(), level, -1); - exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); + exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false); } void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t) { collision(para.get(), pm, level, t, kernels); - prepareExchangeMultiGPU(para.get(), level, -1); - exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); + exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false); } void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t) @@ -300,6 +296,48 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa // 1D domain decomposition // exchangePostCollDataGPU27(para, comm, level); } +void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm) +{ + ////////////////////////////////////////////////////////////////////////// + // 3D domain decomposition + if (useReducedComm) { + // X + prepareExchangeCollDataXGPU27AfterFtoC(para, level, -1); + exchangeCollDataXGPU27AfterFtoC(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferXGPU27AfterFtoC(para, level, -1); + // Y + prepareExchangeCollDataYGPU27AfterFtoC(para, level, -1); + exchangeCollDataYGPU27AfterFtoC(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferYGPU27AfterFtoC(para, level, -1); + // Z + prepareExchangeCollDataZGPU27AfterFtoC(para, level, -1); + exchangeCollDataZGPU27AfterFtoC(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferZGPU27AfterFtoC(para, level, -1); + } else { + // X + prepareExchangeCollDataXGPU27AllNodes(para, level, -1); + exchangeCollDataXGPU27AllNodes(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferXGPU27AllNodes(para, level, -1); + // Y + prepareExchangeCollDataYGPU27AllNodes(para, level, -1); + exchangeCollDataYGPU27AllNodes(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferYGPU27AllNodes(para, level, -1); + // Z + prepareExchangeCollDataZGPU27AllNodes(para, level, -1); + exchangeCollDataZGPU27AllNodes(para, comm, cudaManager, level, -1); + scatterNodesFromRecvBufferZGPU27AllNodes(para, level, -1); + } + + ////////////////////////////////////////////////////////////////////////// + // 3D domain decomposition convection diffusion + if (para->getDiffOn()) { + if (para->getUseStreams()) + std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl; + exchangePostCollDataADXGPU27(para, comm, cudaManager, level); + exchangePostCollDataADYGPU27(para, comm, cudaManager, level); + exchangePostCollDataADZGPU27(para, comm, cudaManager, level); + } +} void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 3a00940735e505d1a48184d3ae4bf7517dd3de3f..82577339825158895f3d4c2c218ece8c68346904 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -65,6 +65,8 @@ extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, C int level, int streamIndex); extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex); +extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, + CudaMemoryManager *cudaManager, int level, bool useReducedComm); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 587d2bd71786d27ef5e3c7643e1a3d7a110e56bb..30d9e7a3d69e16e938d2e8e884c163aca0ef2329 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -220,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe if (para->getUseStreams()) cudaStreamSynchronize(stream); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // edge nodes: copy received node values from x - if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) { + if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) { uint indexInSubdomainX = 0; uint indexInSubdomainY = 0; uint numNodesInBufferX = 0; @@ -323,7 +323,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe if (para->getUseStreams()) cudaStreamSynchronize(stream); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // edge nodes: copy received node values from x - if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) { + if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) { uint indexInSubdomainX = 0; uint indexInSubdomainZ = 0; uint numNodesInBufferX = 0; @@ -343,7 +343,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // edge nodes: copy received node values from y - if (para->getNumberOfProcessNeighborsY(level, "recv") > 0) { + if (para->getUseStreams() && para->getNumberOfProcessNeighborsY(level, "recv") > 0) { uint indexInSubdomainY = 0; uint indexInSubdomainZ = 0; uint numNodesInBufferY = 0;