diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp index 4906620026d5414f9a096029aba2dd08eb95ed64..dc7ac1f193b9a6e4475f575ea945add642c52b21 100644 --- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp +++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp @@ -146,7 +146,8 @@ void multipleLevel(const std::string& configPath) para->setMaxLevel(1); //para->setMainKernel("CumulantK17CompChim"); - para->useStreams = useStreams; + if (useStreams) + para->setUseStreams(); para->setMainKernel("CumulantK17CompChimSparse"); *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n"; diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index a01c0903649fbe4af6fabe9f7ff2478e208aa7d1..d4d0ccdbeae290b7a29a4e27fbcdd3af7273a87f 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -26,7 +26,7 @@ void updateGrid27(Parameter* para, ////////////////////////////////////////////////////////////////////////// - if (para->useStreams) + if (para->getUseStreams()) collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndicesBorder, para->getParD(level)->numberOffluidNodesBorder, 1); else @@ -34,12 +34,12 @@ void updateGrid27(Parameter* para, ////////////////////////////////////////////////////////////////////////// - if (para->useStreams) + if (para->getUseStreams()) exchangeMultiGPU(para, comm, cudaManager, level, 1); else exchangeMultiGPU(para, comm, cudaManager, level, -1); - if (para->useStreams) + if (para->getUseStreams()) collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes, 0); @@ -69,7 +69,7 @@ void updateGrid27(Parameter* para, exchangeMultiGPU(para, comm, cudaManager, level, -1); coarseToFine(para, level); - } + } } void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels) diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index d5aff2758ed69b48895614eddaae4d4fdd823bd6..6009bf9f57f3f7b7885c3d716533759ea72f63cb 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -176,9 +176,10 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Y //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) +void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, + int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -219,7 +220,11 @@ void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, Cud //{ // comm->waitallGPU(); //} - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // wait for memcopy host to device to finish before sending data + if (para->getUseStreams()) + cudaStreamSynchronize(stream); + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { @@ -263,7 +268,7 @@ void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, Cud void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -304,7 +309,12 @@ void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, Cu //{ // comm->waitallGPU(); //} + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // wait for memcopy host to device to finish before sending data + if (para->getUseStreams()) + cudaStreamSynchronize(stream); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // //start blocking MPI send for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) { diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h index b0dd8d53b647fbe60b93c6282cd999bcfd810529..bf4cddf7462167ce28bb16ad07761ee142fb1e7a 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h @@ -14,7 +14,8 @@ extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator ////////////////////////////////////////////////////////////////////////// //3D domain decomposition extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); -extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level, int streamIndex = -1); +extern "C" void exchangePreCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, + int level, int streamIndex = -1); extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level); extern "C" void exchangePostCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level); extern "C" void exchangePostCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index a0615246948185801a06e5be9c1da6d14f693814..53c3221bcdff3e8d7aa18c25944be0c0ccce69c1 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -574,7 +574,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs, - cudaMemcpyHostToDevice, parameter->getStream(streamIndex))); + cudaMemcpyHostToDevice, parameter->getStreamManager().getStream(streamIndex))); } void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex) { @@ -588,7 +588,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int proce cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, - cudaMemcpyDeviceToHost, parameter->getStream(streamIndex))); + cudaMemcpyDeviceToHost, parameter->getStreamManager().getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) { diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu index 3acacc0397bbc1eadf8544bd1acbcdde568cb2b3..3f493de191473307193230843bc366f516b444e3 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu @@ -37,7 +37,7 @@ void CumulantK17CompChimSparse::runOnIndices(const unsigned int *indices, unsign dim3 grid, threads; std::tie(grid, threads) = *calcGridDimensions(para->getParD(level)->numberOfFluidNodes); - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); LB_Kernel_CumulantK17CompChimSparse<<<grid, threads, 0, stream>>>( para->getParD(level)->omega, diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index a90ae2b5bddc2c55f58ebd7d3bd0dbef2f7ccc74..f6e38e90fb260ab836c02eea68706c4b414a8d18 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -105,8 +105,8 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std output.clearLogFile(); ////////////////////////////////////////////////////////////////////////// // CUDA streams - if(para->useStreams) - para->launchStreams((uint)2); + if (para->getUseStreams()) + para->getStreamManager().launchStreams(2u); ////////////////////////////////////////////////////////////////////////// // //output << para->getNeedInterface().at(0) << "\n"; @@ -1157,8 +1157,8 @@ void Simulation::definePMarea(std::shared_ptr<PorousMedia> pMedia) void Simulation::free() { // Cuda Streams - if (para->useStreams) - para->terminateStreams(); + if (para->getUseStreams()) + para->getStreamManager().terminateStreams(); //CudaFreeHostMemory for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..437e36299e6dec0f39141708152af8f1b17d0532 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -0,0 +1,52 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//======================================================================================= +#include "CudaStreamManager.h" + +CudaStreamManager::CudaStreamManager() {} + +CudaStreamManager::~CudaStreamManager() {} + +void CudaStreamManager::launchStreams(uint numberOfStreams) +{ + cudaStreams.resize(numberOfStreams); + for (cudaStream_t &stream : cudaStreams) + cudaStreamCreate(&stream); +} + +void CudaStreamManager::terminateStreams() +{ + for (cudaStream_t &stream : cudaStreams) + cudaStreamDestroy(stream); +} + +cudaStream_t &CudaStreamManager::getStream(uint streamIndex) +{ + return cudaStreams[streamIndex]; +} \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h new file mode 100644 index 0000000000000000000000000000000000000000..3912a383da77619cb742639b047735f0347fced2 --- /dev/null +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h @@ -0,0 +1,52 @@ +//======================================================================================= +// ____ ____ __ ______ __________ __ __ __ __ +// \ \ | | | | | _ \ |___ ___| | | | | / \ | | +// \ \ | | | | | |_) | | | | | | | / \ | | +// \ \ | | | | | _ / | | | | | | / /\ \ | | +// \ \ | | | | | | \ \ | | | \__/ | / ____ \ | |____ +// \ \ | | |__| |__| \__\ |__| \________/ /__/ \__\ |_______| +// \ \ | | ________________________________________________________________ +// \ \ | | | ______________________________________________________________| +// \ \| | | | __ __ __ __ ______ _______ +// \ | | |_____ | | | | | | | | | _ \ / _____) +// \ | | _____| | | | | | | | | | | \ \ \_______ +// \ | | | | |_____ | \_/ | | | | |_/ / _____ | +// \ _____| |__| |________| \_______/ |__| |______/ (_______/ +// +// This file is part of VirtualFluids. VirtualFluids is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// VirtualFluids is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//======================================================================================= +#ifndef STREAM_MANAGER_H +#define STREAM_MANAGER_H + +#include <vector> +#include "Core/DataTypes.h" + +#include <cuda.h> +#include <cuda_runtime.h> + +class CudaStreamManager +{ +private: + std::vector<cudaStream_t> cudaStreams; + +public: + CudaStreamManager(); + ~CudaStreamManager(); + void launchStreams(uint numberOfStreams); + void terminateStreams(); + cudaStream_t &getStream(uint streamIndex); +}; + +#endif \ No newline at end of file diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 7bf40e27b6968cf115194fa5dffe4ff2790e7407..6a75de3b0bfb61869cd8d29009cbe133ba9eaaab 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -53,24 +53,6 @@ Parameter::Parameter(const vf::basics::ConfigurationFile &configData, int number //initLBMSimulationParameter(); } -void Parameter::launchStreams(uint numberOfStreams) -{ - cudaStreams.resize(numberOfStreams); - for (cudaStream_t &stream : cudaStreams) { - cudaStreamCreate(&stream); - } -} - -void Parameter::terminateStreams() { - for (cudaStream_t &stream : cudaStreams) { - cudaStreamDestroy(stream); - } -} - -cudaStream_t& Parameter::getStream(uint streamIndex) { - return cudaStreams[streamIndex]; -} - void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData) { if (configData.contains("NumberOfDevices")) @@ -2503,4 +2485,14 @@ real Parameter::TrafoZtoMGsWorld(int CoordZ, int level) return temp; } +void Parameter::setUseStreams() { + this->useStreams = true; + this->cudaStreamManager = CudaStreamManager(); +} + +bool Parameter::getUseStreams() { return this->useStreams; } + +CudaStreamManager &Parameter::getStreamManager() +{ return this->cudaStreamManager; } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 5d24abe4f812e9a898a32c32fcef9aa759b87593..782ca95f72b1a534d82a7dc434af43609a6c5fa0 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -43,8 +43,7 @@ #include "VirtualFluids_GPU_export.h" -#include <cuda.h> -#include <cuda_runtime.h> +#include "Parameter/CudaStreamManager.h" struct curandStateXORWOW; typedef struct curandStateXORWOW curandState; @@ -761,11 +760,7 @@ public: std::vector<std::shared_ptr<LBMSimulationParameter>> parD = std::vector<std::shared_ptr<LBMSimulationParameter>>(1); //////////////////////////////////////////////////////////////////////////// - // cuda streams - bool useStreams = false; - void launchStreams(uint numberOfStreams); - void terminateStreams(); - cudaStream_t& getStream(uint streamIndex); + private: void readConfigData(const vf::basics::ConfigurationFile &configData); @@ -846,12 +841,19 @@ private: std::vector<std::string> possNeighborFilesRecvX, possNeighborFilesRecvY, possNeighborFilesRecvZ; bool isNeigborX, isNeigborY, isNeigborZ; - std::vector<cudaStream_t> cudaStreams; //////////////////////////////////////////////////////////////////////////// // initial condition std::function<void(real, real, real, real &, real &, real &, real &)> initialCondition; + // cuda streams + bool useStreams { false }; + CudaStreamManager cudaStreamManager; + +public: + void setUseStreams(); + bool getUseStreams(); + CudaStreamManager &getStreamManager(); }; #endif