diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp index 6e73c1e88faca72d0c96af53f60d4d87ae5309f3..4b180fb65383ad4d9ad745ea910677001d6d5b31 100644 --- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp +++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp @@ -7,6 +7,7 @@ //#include "Output/UnstructuredGridWriter.hpp" #include "Communication/ExchangeData27.h" #include "Kernel/Kernel.h" +#include "Parameter/CudaStreamManager.h" void updateGrid27(Parameter* para, vf::gpu::Communicator* comm, @@ -41,10 +42,10 @@ void updateGrid27(Parameter* para, //prepare exchange and trigger bulk kernel when finished prepareExchangeMultiGPU(para, level, borderStreamIndex); if (para->getUseStreams()) - para->getStreamManager().triggerStartBulkKernel(borderStreamIndex); + para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex); // launch bulk kernel - para->getStreamManager().waitOnStartBulkKernelEvent(bulkStreamIndex); + para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex); collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices, para->getParD(level)->numberOfFluidNodes, bulkStreamIndex); diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp index 99a3489a03de1f20cf447664928bde0ef6bd0108..20b100f035164b3d283921366f9dea6021731dd8 100644 --- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp +++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp @@ -1,6 +1,7 @@ #include "Communication/ExchangeData27.h" #include <cuda_runtime.h> #include <helper_cuda.h> +#include "Parameter/CudaStreamManager.h" //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //3D domain decomposition @@ -9,7 +10,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], para->getParD(level)->sendProcessNeighborX[i].f[0], @@ -27,7 +28,7 @@ void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex) void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) @@ -109,7 +110,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], para->getParD(level)->sendProcessNeighborY[i].f[0], @@ -127,7 +128,7 @@ void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex) void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) @@ -219,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe // Z //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], para->getParD(level)->sendProcessNeighborZ[i].f[0], @@ -237,7 +238,7 @@ void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex) void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, int streamIndex) { - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //copy Device to Host for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 72a17e863c4fd1588561ac53e2e9b23e2c0309a5..cf1314b1bffde1a38f016c7bc45b172757d22cdb 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -5,6 +5,7 @@ #include <math.h> #include <Parameter/Parameter.h> +#include "Parameter/CudaStreamManager.h" #include "Calculation/PorousMedia.h" @@ -520,7 +521,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].memsizeFs, cudaMemcpyHostToDevice, - parameter->getStreamManager().getStream(streamIndex))); + parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, int streamIndex) { @@ -534,7 +535,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].memsizeFs, cudaMemcpyDeviceToHost, - parameter->getStreamManager().getStream(streamIndex))); + parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor) { @@ -588,7 +589,8 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs, - cudaMemcpyHostToDevice, parameter->getStreamManager().getStream(streamIndex))); + cudaMemcpyHostToDevice, + parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex) { @@ -602,7 +604,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int proce cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs, - cudaMemcpyDeviceToHost, parameter->getStreamManager().getStream(streamIndex))); + cudaMemcpyDeviceToHost, parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor) { @@ -657,7 +659,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].memsizeFs, cudaMemcpyHostToDevice, - parameter->getStreamManager().getStream(streamIndex))); + parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, int streamIndex) { @@ -671,7 +673,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].memsizeFs, cudaMemcpyDeviceToHost, - parameter->getStreamManager().getStream(streamIndex))); + parameter->getStreamManager()->getStream(streamIndex))); } void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor) { diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu index b2ba5fdb1c7db76dccf3d0da1c494618ad6afaaa..76345fcd84d9ad11a2ad1929870e39a6a2a08814 100644 --- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu +++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu @@ -1,6 +1,7 @@ #include "CumulantK17CompChimSparse.h" #include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" #include "CumulantK17CompChimSparse_Device.cuh" #include <cuda.h> @@ -39,7 +40,7 @@ void CumulantK17CompChimSparse::runOnIndices(const unsigned int *indices, unsign std::tie(grid, threads) = *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads); - cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex); + cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex); LB_Kernel_CumulantK17CompChimSparse<<<grid, threads, 0, stream>>>( para->getParD(level)->omega, diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 6a4761963c3dd2e8928097d4690a1941c8387d9b..9658d4857f043ed0713f1c6b9d25e689db5257c5 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -9,6 +9,7 @@ #include "Communication/Communicator.h" #include "Communication/ExchangeData27.h" #include "Parameter/Parameter.h" +#include "Parameter/CudaStreamManager.h" #include "GPU/GPU_Interface.h" #include "GPU/devCheck.h" #include "basics/utilities/UbFileOutputASCII.h" @@ -106,8 +107,8 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std ////////////////////////////////////////////////////////////////////////// // CUDA streams if (para->getUseStreams()) { - para->getStreamManager().launchStreams(2u); - para->getStreamManager().createCudaEvents(); + para->getStreamManager()->launchStreams(2u); + para->getStreamManager()->createCudaEvents(); } ////////////////////////////////////////////////////////////////////////// // @@ -1171,8 +1172,8 @@ void Simulation::free() { // Cuda Streams if (para->getUseStreams()) { - para->getStreamManager().destroyCudaEvents(); - para->getStreamManager().terminateStreams(); + para->getStreamManager()->destroyCudaEvents(); + para->getStreamManager()->terminateStreams(); } //CudaFreeHostMemory diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp index 440e589847c0ff3a1952e49b212ee6219e5a6086..56bb66f1a07d4ee533ebd2a66ca3eed494d295e7 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp @@ -42,6 +42,8 @@ #include <basics/config/ConfigurationFile.h> +#include "Parameter/CudaStreamManager.h" + Parameter::Parameter(const vf::basics::ConfigurationFile &configData, int numberOfProcesses, int myId) @@ -2487,12 +2489,12 @@ real Parameter::TrafoZtoMGsWorld(int CoordZ, int level) void Parameter::setUseStreams() { this->useStreams = true; - this->cudaStreamManager = CudaStreamManager(); + this->cudaStreamManager = std::make_unique<CudaStreamManager>(); } bool Parameter::getUseStreams() { return this->useStreams; } -CudaStreamManager &Parameter::getStreamManager() { return this->cudaStreamManager; } +std::shared_ptr<CudaStreamManager> &Parameter::getStreamManager() { return this->cudaStreamManager; } void Parameter::findCornerNodesCommMultiGPU() { for (uint level = 0; level < parH.size(); level++) { diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h index 66f36122f08da3910b42ade203cb6ef7fdc628e5..813287d9ccfe4258690e0c07322d885d43b5d9c3 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h @@ -43,8 +43,6 @@ #include "VirtualFluids_GPU_export.h" -#include "Parameter/CudaStreamManager.h" - struct curandStateXORWOW; typedef struct curandStateXORWOW curandState; namespace vf @@ -53,7 +51,8 @@ namespace basics { class ConfigurationFile; } -} +} +class CudaStreamManager; //! \struct LBMSimulationParameter //! \brief struct holds and manages the LB-parameter of the simulation @@ -857,12 +856,12 @@ private: // cuda streams bool useStreams { false }; - CudaStreamManager cudaStreamManager; + std::shared_ptr<CudaStreamManager> cudaStreamManager; public: void setUseStreams(); bool getUseStreams(); - CudaStreamManager &getStreamManager(); + std::shared_ptr<CudaStreamManager> &getStreamManager(); void findCornerNodesCommMultiGPU(); void findCornerNodesXY(int level);