diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh index ad35d16acfd2c9388230f9e198a7ef3b208f02b8..88f65bd68fb21207432cd08cf797cd56ad5f4e78 100644 --- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh +++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh @@ -1,7 +1,6 @@ #ifndef BoundaryConditionStructs_H #define BoundaryConditionStructs_H #include "Core/DataTypes.h" -#include <cuda_runtime.h> //Q for second order BCs //! \struct to manage sub-grid-distances (q) for second order Boundary Conditions (BCs) @@ -33,7 +32,6 @@ typedef struct QforPrecursorBC{ real* weightsNT, *weightsNB, *weightsST, *weightsSB; real* last, *current, *next; real velocityX, velocityY, velocityZ; - cudaStream_t stream; }QforPrecursorBoundaryConditions; //BCTemp diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt index 2adc40edeba92c3cbc51ea2c15709956ca03bdec..40496abc6902d01f642b9e117d170d95c199100f 100644 --- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt +++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt @@ -22,8 +22,5 @@ if(BUILD_VF_UNIT_TESTS) set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA) set_source_files_properties(BoundaryConditions/BoundaryConditionFactoryTest.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(Parameter/ParameterTest.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(KernelManager/BCKernelManagerTest.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(Parameter/EdgeNodeFinderTest.cpp PROPERTIES LANGUAGE CUDA) target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/") endif() diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp index 18fb60411e1bf092bcae0c213ec157433edd0657..b37699962ab92963837bdb191554940a4889329c 100644 --- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp +++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp @@ -1704,7 +1704,6 @@ void CudaMemoryManager::cudaAllocPrecursorData(int lev) { size_t size = parameter->getParH(lev)->precursorBC.numberOfPrecursorNodes*sizeof(real)*parameter->getParH(lev)->precursorBC.numberOfQuantities; - checkCudaErrors( cudaStreamCreate(¶meter->getParH(lev)->precursorBC.stream) ); checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.last, size)); checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.current, size)); checkCudaErrors( cudaMallocHost((void**) ¶meter->getParH(lev)->precursorBC.next, size)); @@ -1739,9 +1738,10 @@ void CudaMemoryManager::cudaCopyPrecursorBC(int lev) void CudaMemoryManager::cudaCopyPrecursorData(int lev) { auto prec = ¶meter->getParH(lev)->precursorBC; + auto precStream = parameter->getStreamManager()->getStream(parameter->getStreamManager()->getPrecursorStreamIndex()); size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities; - checkCudaErrors( cudaStreamSynchronize(prec->stream) ); - checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, prec->stream)) ; + checkCudaErrors( cudaStreamSynchronize(precStream) ); + checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, precStream) ); } diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp index 83ca85243ae4e8cc8ff1316e73ed8c5ae3816516..d31ef0cb2a2a28c20e07207c75c9ef9ef771f853 100644 --- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp +++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp @@ -115,7 +115,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa ////////////////////////////////////////////////////////////////////////// // CUDA streams if (para->getUseStreams()) { - para->getStreamManager()->launchStreams(2u); + para->getStreamManager()->launchStreams(3u); para->getStreamManager()->createCudaEvents(); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp index 3731836f336d91c1bc4cc5f1a8f5ea0a10bee0a6..e44c0885b44b673f3c666bed6ee0b20bce436db5 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp @@ -51,6 +51,8 @@ int CudaStreamManager::getBorderStreamIndex() { return borderStreamIndex; } int CudaStreamManager::getBulkStreamIndex() { return bulkStreamIndex; } +int CudaStreamManager::getPrecursorStreamIndex() { return precursorStreamIndex; } + void CudaStreamManager::createCudaEvents() { checkCudaErrors(cudaEventCreateWithFlags(&startBulkKernel, cudaEventDisableTiming)); diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h index c2d515ab5fe9c24388632a7ca9e1e4c78b7f1467..d595fdce4155310b59bf5ae4fcbf008817611c4a 100644 --- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h +++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h @@ -40,6 +40,7 @@ class CudaStreamManager private: std::vector<cudaStream_t> cudaStreams; cudaEvent_t startBulkKernel = NULL; + const int precursorStreamIndex = 2; const int borderStreamIndex = 1; const int bulkStreamIndex = 0; @@ -47,9 +48,9 @@ public: void launchStreams(uint numberOfStreams); void terminateStreams(); cudaStream_t &getStream(uint streamIndex); - int getBorderStreamIndex(); int getBulkStreamIndex(); + int getPrecursorStreamIndex(); // Events void createCudaEvents();