diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp index 2b9ce1f112882daff7a22e55b8206ab0b980c34c..30fbd5ec8106fd6fef221d62b5a8d09b70b9af86 100644 --- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp +++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp @@ -109,7 +109,7 @@ void multipleLevel(const std::string& configPath) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool useGridGenerator = true; - bool useMultiGPU = false; + bool useMultiGPU = true; bool useStreams= true; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -132,8 +132,8 @@ void multipleLevel(const std::string& configPath) *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n"; *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n"; - para->setTOut(5000); - para->setTEnd(50000); + para->setTOut(100); + para->setTEnd(100); para->setCalcDragLift(false); para->setUseWale(false); @@ -148,7 +148,7 @@ void multipleLevel(const std::string& configPath) //para->setMainKernel("CumulantK17CompChim"); para->useStreams = useStreams; para->setMainKernel("CumulantK17CompChimSparse"); - *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n"; + *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n"; if (useMultiGPU) { para->setDevices(std::vector<uint>{ (uint)0, (uint)1 }); @@ -186,8 +186,8 @@ void multipleLevel(const std::string& configPath) TriangularMesh *bivalveSTL = TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + ".stl"); - TriangularMesh* bivalveRef_1_STL = - TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + "_Level1.stl"); + //TriangularMesh* bivalveRef_1_STL = + // TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + "_Level1.stl"); if (useMultiGPU) { const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID(); @@ -204,8 +204,8 @@ void multipleLevel(const std::string& configPath) xGridMax, yGridMax, zGridMax, dxGrid); } - gridBuilder->setNumberOfLayers(6, 8); - gridBuilder->addGrid(bivalveRef_1_STL, 1); + //gridBuilder->setNumberOfLayers(6, 8); + //gridBuilder->addGrid(bivalveRef_1_STL, 1); gridBuilder->addGeometry(bivalveSTL); @@ -222,8 +222,6 @@ void multipleLevel(const std::string& configPath) gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!! - gridBuilder->findFluidNodes(useStreams); - if (generatePart == 0) { gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM); gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1); @@ -244,6 +242,8 @@ void multipleLevel(const std::string& configPath) gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0); ////////////////////////////////////////////////////////////////////////// + gridBuilder->findFluidNodes(useStreams); + //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_"); //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/"); //gridBuilder->writeArrows(path + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow"); @@ -262,7 +262,6 @@ void multipleLevel(const std::string& configPath) gridBuilder->setPeriodicBoundaryCondition(false, false, true); gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!! - gridBuilder->findFluidNodes(useStreams); ////////////////////////////////////////////////////////////////////////// gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0); @@ -273,6 +272,8 @@ void multipleLevel(const std::string& configPath) gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0); ////////////////////////////////////////////////////////////////////////// + gridBuilder->findFluidNodes(useStreams); + // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + bivalveType + "/grid/"); // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + bivalveType + "/arrow"); diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 9d3bd9030b5f82e100d5d913906b60b257df2e4e..bf2be2cbd47becb90ce2e6fef7024bd06a00e336 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -27,17 +27,20 @@ void updateGrid27(Parameter* para, ////////////////////////////////////////////////////////////////////////// if (para->useStreams) { - collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, - para->getParD(level)->numberOfFluidNodes, 0); collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, para->getParD(level)->numberOffluidNodesBorder, 1); + collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, + para->getParD(level)->numberOfFluidNodes, 0); } else collision(para, pm, level, t, kernels); ////////////////////////////////////////////////////////////////////////// - exchangeMultiGPU(para, comm, cudaManager, level); + if (para->useStreams) + exchangeMultiGPU(para, comm, cudaManager, level, 1); + else + exchangeMultiGPU(para, comm, cudaManager, level, -1); ////////////////////////////////////////////////////////////////////////// @@ -62,7 +65,10 @@ void updateGrid27(Parameter* para, { fineToCoarse(para, level); - exchangeMultiGPU(para, comm, cudaManager, level); + if (para->useStreams) + exchangeMultiGPU(para, comm, cudaManager, level, 1); + else + exchangeMultiGPU(para, comm, cudaManager, level, -1); coarseToFine(para, level); } @@ -177,7 +183,8 @@ void collisionAdvectionDiffusion(Parameter* para, int level) } } -void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, + int streamIndex) { if (para->getNumprocs() > 1) { @@ -186,7 +193,7 @@ void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryMa ////////////////////////////////////////////////////////////////////////// //3D domain decomposition exchangePostCollDataXGPU27(para, comm, cudaManager, level); - exchangePostCollDataYGPU27(para, comm, cudaManager, level); + exchangePostCollDataYGPU27(para, comm, cudaManager, level, streamIndex); exchangePostCollDataZGPU27(para, comm, cudaManager, level); ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h index 44b02d36fa4a428e3b0ba299df73430ef491ca7a..8e9fbff0414cf5e211957822089f8b2274e2cba0 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h @@ -26,7 +26,8 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt extern "C" void collisionAdvectionDiffusion(Parameter* para, int level); -extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex); extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 8f89656ac6feb7dfe2644a2b6d604ccec510c3cb..474f5327f8348efc84c838ff478edb171329686d 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -176,7 +176,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -192,8 +192,10 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud para->getParD(level)->size_Mat_SP, para->getParD(level)->evenOrOdd, para->getParD(level)->numberofthreads); - ////////////////////////////////////////////////////////////////////////// - cudaManager->cudaCopyProcessNeighborYFsDH(level, i); + //////////////////////////////////////////////////////////////////////////); + cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex); + if (streamIndex != -1) + cudaStreamSynchronize(para->getStream(streamIndex)); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start non blocking MPI receive @@ -257,7 +259,8 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level) +void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, + int streamIndex) { /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host @@ -274,7 +277,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu para->getParD(level)->evenOrOdd, para->getParD(level)->numberofthreads); ////////////////////////////////////////////////////////////////////////// - cudaManager->cudaCopyProcessNeighborYFsDH(level, i); + cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start non blocking MPI receive diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index 34c9cba801c675bdf4c2cd39daca3be2d7918dbe..b0dd8d53b647fbe60b93c6282cd999bcfd810529 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -14,10 +14,11 @@ extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator ////////////////////////////////////////////////////////////////////////// //3D domain decomposition extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level, int streamIndex = -1); extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); +extern "C" void exchangePostCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level); +extern "C" void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex = -1); extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); ////////////////////////////////////////////////////////////////////////// //3D domain decomposition convection diffusion diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index e4fe89cb9b68ea2464832f182cfb74ac3bb1e3b3..827746a0636e1100623255e232142c7aca04da32 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -570,12 +570,19 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs, cudaMemcpyHostToDevice)); } -void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor) -{ - checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], - parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, - cudaMemcpyDeviceToHost)); +void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex) +{ + if (streamIndex == -1) + checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, + cudaMemcpyDeviceToHost)); + else + checkCudaErrors( + cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], + parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, + cudaMemcpyDeviceToHost, parameter->getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) { diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h index 492d9b9beca76f523e5a0b16732b0af76b1d3d56..5db28ec4493a693f030ebf13edff05f523b674eb 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h @@ -96,7 +96,7 @@ public: // void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor); void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor); - void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor); + void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex); void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor); void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor); // diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 9bbfa66b6f1132a66146bba833ba7c5a5c18d041..a90ae2b5bddc2c55f58ebd7d3bd0dbef2f7ccc74 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -433,7 +433,7 @@ void Simulation::run() // run Analyzers for kinetic energy and enstrophy for TGV in 3D // these analyzers only work on level 0 //////////////////////////////////////////////////////////////////////////////// - if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0); + if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0, -1); if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t); if( this->enstrophyAnalyzer ) this->enstrophyAnalyzer->run(t); @@ -673,7 +673,7 @@ void Simulation::run() { ////////////////////////////////////////////////////////////////////////// //exchange data for valid post process - exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev); + exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev, -1); ////////////////////////////////////////////////////////////////////////// //if (para->getD3Qxx()==19) //{