diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index c47c14521efc7833058fc7816e298aeb4d333d7f..cd53553174ee74de2b3ff32b1ff0356ec79cc436 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -48,7 +48,7 @@ void UpdateGrid27::updateGrid(int level, unsigned int t) void UpdateGrid27::refinementAndExchange_noRefinementAndExchange(int level) {} -void UpdateGrid27::refinementAndExchange_streams(int level) +void UpdateGrid27::refinementAndExchange_streams_onlyExchangeInterface(int level) { int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); @@ -82,6 +82,40 @@ void UpdateGrid27::refinementAndExchange_streams(int level) cudaDeviceSynchronize(); } +void UpdateGrid27::refinementAndExchange_streams_completeExchange(int level) +{ + int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); + int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); + + // fine to coarse border + fineToCoarseWithStream(para.get(), level, para->getParD(level)->intFCBorder.ICellFCC, + para->getParD(level)->intFCBorder.ICellFCF, para->getParD(level)->intFCBorder.kFC, + borderStreamIndex); + + // prepare exchange and trigger bulk kernel when finished + prepareExchangeMultiGPU(para.get(), level, borderStreamIndex); + if (para->getUseStreams()) + para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); + + // launch bulk kernels (f to c and c to f) + para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); + fineToCoarseWithStream(para.get(), level, para->getParD(level)->intFCBulk.ICellFCC, + para->getParD(level)->intFCBulk.ICellFCF, para->getParD(level)->intFCBulk.kFC, + bulkStreamIndex); + coarseToFineWithStream(para.get(), level, para->getParD(level)->intCFBulk.ICellCFC, + para->getParD(level)->intCFBulk.ICellCFF, para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, + bulkStreamIndex); + + // exchange + exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, borderStreamIndex); + + // coarse to fine border + coarseToFineWithStream(para.get(), level, para->getParD(level)->intCFBorder.ICellCFC, + para->getParD(level)->intCFBorder.ICellCFF, para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, + borderStreamIndex); + cudaDeviceSynchronize(); +} + void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int level) { fineToCoarse(para.get(), level); @@ -1533,14 +1567,18 @@ void UpdateGrid27::chooseFunctionForRefinementAndExchange() this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noExchange; std::cout << "refinementAndExchange_noExchange()" << std::endl; - } else if (para->getUseStreams() && para->getNumprocs() > 1 && para->useReducedCommunicationAfterFtoC) { - this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_streams; - std::cout << "refinementAndExchange_streams()" << std::endl; + } else if (para->getNumprocs() > 1 && para->getUseStreams() && para->useReducedCommunicationAfterFtoC) { + this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_streams_onlyExchangeInterface; + std::cout << "refinementAndExchange_streams_onlyExchangeInterface()" << std::endl; + + } else if(para->getNumprocs() > 1 && para->getUseStreams() && !para->useReducedCommunicationAfterFtoC){ + this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_streams_completeExchange; + std::cout << "refinementAndExchange_streams_completeExchange()" << std::endl; - } else if (para->getNumprocs() > 1 && para->useReducedCommunicationAfterFtoC) { + } else if (para->getNumprocs() > 1 && !para->getUseStreams() && para->useReducedCommunicationAfterFtoC) { this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface; std::cout << "refinementAndExchange_noStreams_onlyExchangeInterface()" << std::endl; - + } else { this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noStreams_completeExchange; std::cout << "refinementAndExchange_noStreams_completeExchange()" << std::endl; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index d55e4ee7283c1cf0436f5f418804912c426be151..086b39fcb2745cbd66467d1316c25482dd2ab323 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -32,10 +32,11 @@ private: void collisionAndExchange_streams(int level, unsigned int t); // functions for refinement and exchange - void refinementAndExchange_noRefinementAndExchange(int level); - void refinementAndExchange_streams(int level); + void refinementAndExchange_streams_onlyExchangeInterface(int level); + void refinementAndExchange_streams_completeExchange(int level); void refinementAndExchange_noStreams_onlyExchangeInterface(int level); void refinementAndExchange_noStreams_completeExchange(int level); + void refinementAndExchange_noRefinementAndExchange(int level); void refinementAndExchange_noExchange(int level);