diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index fd3bc19b2e0965ba8219bada9c3a8bbe27d4fb00..dbb568df74a06d813a7bc3c6d05a3aed0cf62198 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -9,7 +9,7 @@ #include "Kernel/Kernel.h" #include "Parameter/CudaStreamManager.h" -void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, +void UpdateGrid27::updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels) { @@ -26,33 +26,7 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage ////////////////////////////////////////////////////////////////////////// - if (para->getUseStreams() && para->getNumprocs() > 1) { - // launch border kernel - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, - para->getParD(level)->numberOffluidNodesBorder, borderStreamIndex); - - // prepare exchange and trigger bulk kernel when finished - prepareExchangeMultiGPU(para, level, borderStreamIndex); - if (para->getUseStreams()) - para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); - - // launch bulk kernel - para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); - - exchangeMultiGPU(para, comm, cudaManager, level, borderStreamIndex); - } else { - if (para->getKernelNeedsFluidNodeIndicesToRun()) - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, -1); - else - collision(para, pm, level, t, kernels); - - prepareExchangeMultiGPU(para, level, -1); - exchangeMultiGPU(para, comm, cudaManager, level, -1); - } - + collisionAndExchange(para, pm, level, t, kernels, comm, cudaManager); ////////////////////////////////////////////////////////////////////////// postCollisionBC(para, level, t); @@ -104,6 +78,47 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage } } +void collisionAndExchange_noStreams_indexKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, + int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, + vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +{ + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, -1); + prepareExchangeMultiGPU(para, level, -1); + exchangeMultiGPU(para, comm, cudaManager, level, -1); +} + +void collisionAndExchange_noStreams_oldKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, + int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, + vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +{ + collision(para, pm, level, t, kernels); + prepareExchangeMultiGPU(para, level, -1); + exchangeMultiGPU(para, comm, cudaManager, level, -1); +} + +void collisionAndExchange_streams(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, + unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager) +{ + int borderStreamIndex = 1; + int bulkStreamIndex = 0; + // launch border kernel + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, + para->getParD(level)->numberOffluidNodesBorder, borderStreamIndex); + + // prepare exchange and trigger bulk kernel when finished + prepareExchangeMultiGPU(para, level, borderStreamIndex); + if (para->getUseStreams()) + para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); + + // launch bulk kernel + para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); + + exchangeMultiGPU(para, comm, cudaManager, level, borderStreamIndex); +} + void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels) { kernels.at(level)->run(); @@ -1417,3 +1432,50 @@ void coarseToFine(Parameter* para, int level) } } + +UpdateGrid27::UpdateGrid27() = default; +UpdateGrid27::~UpdateGrid27() = default; +UpdateGrid27::UpdateGrid27(const UpdateGrid27 &updateGrid) {} +UpdateGrid27::UpdateGrid27(UpdateGrid27 &&updateGrid27) {} + +UpdateGrid27::UpdateGrid27(Parameter *para) { + chooseFunctionForCollisionAndExchange(para); } + + +void UpdateGrid27::chooseFunctionForCollisionAndExchange(Parameter *para) +{ + std::cout << "Function used for collisionAndExchange: "; + if (para->getUseStreams() && para->getNumprocs() > 1 && para->getKernelNeedsFluidNodeIndicesToRun()) { + this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, + unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, + CudaMemoryManager *cudaManager) { + collisionAndExchange_streams(para, pm, level, t, kernels, comm, cudaManager); + }; + std::cout << "collisionAndExchange_streams()" << std::endl; + + } else if (para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) { + std::cout << "Cuda Streams can only be used with kernels which run using fluidNodesIndices." << std::endl; + + } else if (para->getUseStreams() && para->getNumprocs() <= 1) { + std::cout << "Cuda Streams can only be with multiple MPI processes." << std::endl; + + } else if (!para->getUseStreams() && para->getKernelNeedsFluidNodeIndicesToRun()) { + this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, + unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, + CudaMemoryManager *cudaManager) { + collisionAndExchange_noStreams_indexKernel(para, pm, level, t, kernels, comm, cudaManager); + }; + std::cout << "collisionAndExchange_noStreams_indexKernel()" << std::endl; + + } else if (!para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) { + this->collisionAndExchange = [](Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, + unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, + CudaMemoryManager *cudaManager) { + collisionAndExchange_noStreams_oldKernel(para, pm, level, t, kernels, comm, cudaManager); + }; + std::cout << "collisionAndExchange_noStreams_oldKernel()" << std::endl; + + } else { + std::cout << "Invalid Configuration for collision and exchange" << std::endl; + } +} diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 4f4bf21731b4f5d229e18cb76fe02e84027b3e2f..aa39b81592ffa824a1b56baa48607b4d88d2b553 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -10,13 +10,36 @@ class Kernel; -extern "C" void updateGrid27(Parameter* para, - vf::gpu::Communicator* comm, - CudaMemoryManager* cudaManager, - std::vector<std::shared_ptr<PorousMedia>>& pm, - int level, - unsigned int t, - std::vector < SPtr< Kernel>>& kernels); +class UpdateGrid27 +{ +public: + UpdateGrid27(Parameter *para); + ~UpdateGrid27(); + UpdateGrid27(const UpdateGrid27 &updateGrid); + UpdateGrid27(UpdateGrid27 &&updateGrid27); + + void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, + std::vector<SPtr<Kernel>> &kernels); + +private: + UpdateGrid27(); + std::function<void(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, + std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager)> + collisionAndExchange; + void chooseFunctionForCollisionAndExchange(Parameter *para); +}; + +extern "C" void collisionAndExchange_noStreams_indexKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, + int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, + vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); + +extern "C" void collisionAndExchange_noStreams_oldKernel(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, + int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels, + vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); + +extern "C" void collisionAndExchange_streams(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, + unsigned int t, std::vector<SPtr<Kernel>> &kernels, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager); extern "C" void collision(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t, std::vector<SPtr<Kernel>> &kernels); diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 584e919c33730143cf7577008099006c2836a508..2831c741448a7051415562831075291981334e9f 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -358,6 +358,11 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std output << "done.\n"; } + ////////////////////////////////////////////////////////////////////////// + // Init UpdateGrid + ////////////////////////////////////////////////////////////////////////// + this->updateGrid27 = std::make_unique<UpdateGrid27>(para.get()); + ////////////////////////////////////////////////////////////////////////// //Print Init ////////////////////////////////////////////////////////////////////////// @@ -435,7 +440,7 @@ void Simulation::run() //////////////////////////////////////////////////////////////////////////////// for(t=para->getTStart();t<=para->getTEnd();t++) { - updateGrid27(para.get(), comm, cudaManager.get(), pm, 0, t, kernels); + this->updateGrid27->updateGrid27(para.get(), comm, cudaManager.get(), pm, 0, t, kernels); //////////////////////////////////////////////////////////////////////////////// //Particles diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h index ea7b59daa732217faa8abc0683fb19e4ebe73f3f..75c979107f71798ba62f1ce29b4085effca03e18 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h @@ -34,6 +34,7 @@ class KernelFactory; class PreProcessor; class PreProcessorFactory; class TrafficMovementFactory; +class UpdateGrid27; class VIRTUALFLUIDS_GPU_EXPORT Simulation { @@ -111,5 +112,6 @@ protected: SPtr<EnstrophyAnalyzer> enstrophyAnalyzer; //////////////////////////////////////////////////////////////////////////// + UPtr<UpdateGrid27> updateGrid27; }; #endif diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp index a3e7f5a663db274d398235e54f26a7a0bd3b2366..f4a48d82f160524c8547e0725e565fa59fdca137 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -57,7 +57,9 @@ void CudaStreamManager::createCudaEvents() checkCudaErrors(cudaEventCreateWithFlags(&startBulkKernel, cudaEventDisableTiming)); } -void CudaStreamManager::destroyCudaEvents() {checkCudaErrors(cudaEventDestroy(startBulkKernel)); +void CudaStreamManager::destroyCudaEvents() +{ + checkCudaErrors(cudaEventDestroy(startBulkKernel)); } void CudaStreamManager::triggerStartBulkKernel(int streamIndex)