diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 59c656812e542066b1232e7a9b3c8c2a97bdbf2b..c17895df364695dfd498a2ae7b2224bb60d62732 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -22,7 +22,7 @@ void UpdateGrid27::updateGrid(Parameter *para, vf::gpu::Communicator *comm, Cuda ////////////////////////////////////////////////////////////////////////// - this->collisionAndExchange(para, pm, level, t, kernels, comm, cudaManager); + this->collisionAndExchange(level, t); ////////////////////////////////////////////////////////////////////////// @@ -44,112 +44,103 @@ void UpdateGrid27::updateGrid(Parameter *para, vf::gpu::Communicator *comm, Cuda ////////////////////////////////////////////////////////////////////////// if( level != para->getFine() ) { - this->refinementAndExchange(para, level, comm, cudaManager); + this->refinementAndExchange(level); } } -void refinementAndExchange_streams(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) +void UpdateGrid27::refinementAndExchange_streams(int level) { int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); // fine to coarse border - fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC, + fineToCoarseWithStream(para.get(), level, para->getParD(level)->intFCBorder.ICellFCC, para->getParD(level)->intFCBorder.ICellFCF, para->getParD(level)->intFCBorder.kFC, borderStreamIndex); // prepare exchange and trigger bulk kernel when finished - prepareExchangeMultiGPUAfterFtoC(para, level, borderStreamIndex); + prepareExchangeMultiGPUAfterFtoC(para.get(), level, borderStreamIndex); if (para->getUseStreams()) para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); // launch bulk kernels (f to c and c to f) para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - fineToCoarseWithStream(para, level, para->getParD(level)->intFCBulk.ICellFCC, + fineToCoarseWithStream(para.get(), level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.ICellFCF, para->getParD(level)->intFCBulk.kFC, bulkStreamIndex); - coarseToFineWithStream(para, level, para->getParD(level)->intCFBulk.ICellCFC, + coarseToFineWithStream(para.get(), level, para->getParD(level)->intCFBulk.ICellCFC, para->getParD(level)->intCFBulk.ICellCFF, para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, bulkStreamIndex); // exchange - exchangeMultiGPUAfterFtoC(para, comm, cudaManager, level, borderStreamIndex); + exchangeMultiGPUAfterFtoC(para.get(), comm, cudaManager.get(), level, borderStreamIndex); // coarse to fine border - coarseToFineWithStream(para, level, para->getParD(level)->intCFBorder.ICellCFC, + coarseToFineWithStream(para.get(), level, para->getParD(level)->intCFBorder.ICellCFC, para->getParD(level)->intCFBorder.ICellCFF, para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, borderStreamIndex); } -void refinementAndExchange_noStreams_onlyExchangeInterface(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) +void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int level) { - fineToCoarse(para, level); + fineToCoarse(para.get(), level); - prepareExchangeMultiGPUAfterFtoC(para, level, -1); - exchangeMultiGPUAfterFtoC(para, comm, cudaManager, level, -1); + prepareExchangeMultiGPUAfterFtoC(para.get(), level, -1); + exchangeMultiGPUAfterFtoC(para.get(), comm, cudaManager.get(), level, -1); - coarseToFine(para, level); + coarseToFine(para.get(), level); } -void refinementAndExchange_noStreams_completeExchange(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) +void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level) { - fineToCoarse(para, level); + fineToCoarse(para.get(), level); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); + prepareExchangeMultiGPU(para.get(), level, -1); + exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); - coarseToFine(para, level); + coarseToFine(para.get(), level); } -void refinementAndExchange_noExchange(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) +void UpdateGrid27::refinementAndExchange_noExchange(int level) { - fineToCoarse(para, level); - coarseToFine(para, level); + fineToCoarse(para.get(), level); + coarseToFine(para.get(), level); } -void collisionAndExchange_noStreams_indexKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, - int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, - vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigned int t) { - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes, -1); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); + prepareExchangeMultiGPU(para.get(), level, -1); + exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); } -void collisionAndExchange_noStreams_oldKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, - int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, - vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t) { - collision(para, pm, level, t, kernels); - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); + collision(para.get(), pm, level, t, kernels); + prepareExchangeMultiGPU(para.get(), level, -1); + exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1); } -void collisionAndExchange_streams(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, - unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t) { int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex(); int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex(); // launch border kernel - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, + collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, para->getParD(level)->numberOffluidNodesBorder, borderStreamIndex); // prepare exchange and trigger bulk kernel when finished - prepareExchangeMultiGPU(para, level, borderStreamIndex); + prepareExchangeMultiGPU(para.get(), level, borderStreamIndex); if (para->getUseStreams()) para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); // launch bulk kernel para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); - exchangeMultiGPU(para, comm, cudaManager, level, borderStreamIndex); + exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, borderStreamIndex); } void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels) @@ -1449,21 +1440,20 @@ void coarseToFineWithStream(Parameter *para, int level, uint *iCellCFC, uint *iC } -UpdateGrid27::UpdateGrid27(Parameter *para) { - chooseFunctionForCollisionAndExchange(para); - chooseFunctionForRefinementAndExchange(para); +UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator *comm, SPtr<CudaMemoryManager> cudaManager, + std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels) + : para(para), comm(comm), cudaManager(cudaManager), pm(pm), kernels(kernels) +{ + chooseFunctionForCollisionAndExchange(); + chooseFunctionForRefinementAndExchange(); } -void UpdateGrid27::chooseFunctionForCollisionAndExchange(Parameter *para) +void UpdateGrid27::chooseFunctionForCollisionAndExchange() { std::cout << "Function used for collisionAndExchange: "; if (para->getUseStreams() && para->getNumprocs() > 1 && para->getKernelNeedsFluidNodeIndicesToRun()) { - this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, - unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - collisionAndExchange_streams(para, pm, level, t, kernels, comm, cudaManager); - }; + this->collisionAndExchange = [this](int level, unsigned int t) { collisionAndExchange_streams(level, t); }; std::cout << "collisionAndExchange_streams()" << std::endl; } else if (para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) { @@ -1471,58 +1461,47 @@ void UpdateGrid27::chooseFunctionForCollisionAndExchange(Parameter *para) } else if (para->getUseStreams() && para->getNumprocs() <= 1) { std::cout << "Cuda Streams can only be used with multiple MPI processes." << std::endl; - + } else if (!para->getUseStreams() && para->getKernelNeedsFluidNodeIndicesToRun()) { - this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, - unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - collisionAndExchange_noStreams_indexKernel(para, pm, level, t, kernels, comm, cudaManager); + this->collisionAndExchange = [this](int level, unsigned int t) { + collisionAndExchange_noStreams_indexKernel(level, t); }; std::cout << "collisionAndExchange_noStreams_indexKernel()" << std::endl; - + } else if (!para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) { - this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, - unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - collisionAndExchange_noStreams_oldKernel(para, pm, level, t, kernels, comm, cudaManager); + this->collisionAndExchange = [this](int level, unsigned int t) { + collisionAndExchange_noStreams_oldKernel(level, t); }; std::cout << "collisionAndExchange_noStreams_oldKernel()" << std::endl; - + } else { std::cout << "Invalid Configuration for collision and exchange" << std::endl; } } -void UpdateGrid27::chooseFunctionForRefinementAndExchange(Parameter *para) +void UpdateGrid27::chooseFunctionForRefinementAndExchange() { std::cout << "Function used for refinementAndExchange: "; if (para->getMaxLevel() == 0) { - this->refinementAndExchange = [](Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) {}; + this->refinementAndExchange = [](int level) {}; std::cout << "only one level - no function needed." << std::endl; - } else if (para->getNumprocs() == 1){ - this->refinementAndExchange = [](Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - refinementAndExchange_noExchange(para, level, comm, cudaManager); - }; + + } else if (para->getNumprocs() == 1) { + this->refinementAndExchange = [this](int level) { this->refinementAndExchange_noExchange(level); }; std::cout << "refinementAndExchange_noExchange()" << std::endl; + } else if (para->getUseStreams() && para->getNumprocs() > 1 && para->useReducedCommunicationAfterFtoC) { - this->refinementAndExchange = [](Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - refinementAndExchange_streams(para, level, comm, cudaManager); - }; + this->refinementAndExchange = [this](int level) { refinementAndExchange_streams(level); }; std::cout << "refinementAndExchange_streams()" << std::endl; + } else if (para->getNumprocs() > 1 && para->useReducedCommunicationAfterFtoC) { - this->refinementAndExchange = [](Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - refinementAndExchange_noStreams_onlyExchangeInterface(para, level, comm, cudaManager); + this->refinementAndExchange = [this](int level) { + refinementAndExchange_noStreams_onlyExchangeInterface(level); }; std::cout << "refinementAndExchange_noStreams_onlyExchangeInterface()" << std::endl; + } else { - this->refinementAndExchange = [](Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager) { - refinementAndExchange_noStreams_completeExchange(para, level, comm, cudaManager); - }; + this->refinementAndExchange = [this](int level) { refinementAndExchange_noStreams_completeExchange(level); }; std::cout << "refinementAndExchange_noStreams_completeExchange()" << std::endl; } } \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 56e8d7ea18091ec3227f81f123afe496013e2f87..cb0fb720c19853dfd4e7e69bc546dd802338ecba 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -13,34 +13,39 @@ class Kernel; class UpdateGrid27 { public: - UpdateGrid27(Parameter *para); + UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator *comm, SPtr<CudaMemoryManager> cudaManager, + std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels); void updateGrid(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels); private: - std::function<void(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, - std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager)> - collisionAndExchange = nullptr; - std::function<void(Parameter *para, int level, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager)> - refinementAndExchange = nullptr; + std::function<void(int level, unsigned int t)> collisionAndExchange = nullptr; + std::function<void(int level)> refinementAndExchange = nullptr; - void chooseFunctionForCollisionAndExchange(Parameter *para); - void chooseFunctionForRefinementAndExchange(Parameter *para); + void chooseFunctionForCollisionAndExchange(); + void chooseFunctionForRefinementAndExchange(); + // functions for collision and exchange + void collisionAndExchange_noStreams_indexKernel(int level, unsigned int t); + void collisionAndExchange_noStreams_oldKernel(int level, unsigned int t); + void collisionAndExchange_streams(int level, unsigned int t); + + // functions for refinement and exchange + void refinementAndExchange_streams(int level); + void refinementAndExchange_noStreams_onlyExchangeInterface(int level); + void refinementAndExchange_noStreams_completeExchange(int level); + void refinementAndExchange_noExchange(int level); -}; -extern "C" void collisionAndExchange_noStreams_indexKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, - int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, - vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); + SPtr<Parameter> para; + vf::gpu::Communicator *comm; + SPtr<CudaMemoryManager> cudaManager; + std::vector<std::shared_ptr<PorousMedia>> pm; + std::vector<SPtr<Kernel>> kernels; +}; -extern "C" void collisionAndExchange_noStreams_oldKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, - int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, - vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); -extern "C" void collisionAndExchange_streams(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, - unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); extern "C" void collision(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels); @@ -73,15 +78,6 @@ extern "C" void coarseToFine(Parameter* para, int level); extern "C" void coarseToFineWithStream(Parameter *para, int level, uint *iCellCFC, uint *iCellCFF, uint k_CF, OffCF &offCF, int streamIndex); -extern "C" void refinementAndExchange_streams(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager); -extern "C" void refinementAndExchange_noStreams_onlyExchangeInterface(Parameter *para, int level, - vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager); -extern "C" void refinementAndExchange_noStreams_completeExchange(Parameter *para, int level, - vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager); -extern "C" void refinementAndExchange_noExchange(Parameter *para, int level, vf::gpu::Communicator *comm, - CudaMemoryManager *cudaManager); + #endif diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 3c3f159d4886c77dea0ad6b3d35538999e431a9e..9ca582a0eb480f2180c2968a0e5713ad4cc41426 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -361,7 +361,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// // Init UpdateGrid ////////////////////////////////////////////////////////////////////////// - this->updateGrid27 = std::make_unique<UpdateGrid27>(para.get()); + this->updateGrid27 = std::make_unique<UpdateGrid27>(para, comm, cudaManager, pm, kernels); ////////////////////////////////////////////////////////////////////////// //Print Init