From bb201172d55801e17e056f396599723d69d700dc Mon Sep 17 00:00:00 2001
From: Henry <henry.korb@geo.uu.se>
Date: Wed, 12 Oct 2022 11:43:22 +0200
Subject: [PATCH] move precursor Stream to streammanager

---
 .../BoundaryConditions/BoundaryConditionStructs.cuh         | 2 --
 src/gpu/VirtualFluids_GPU/CMakeLists.txt                    | 3 ---
 src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp         | 6 +++---
 src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp                | 2 +-
 src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp   | 2 ++
 src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h     | 3 ++-
 6 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh
index ad35d16ac..88f65bd68 100644
--- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh
+++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionStructs.cuh
@@ -1,7 +1,6 @@
 #ifndef BoundaryConditionStructs_H
 #define BoundaryConditionStructs_H
 #include "Core/DataTypes.h"
-#include <cuda_runtime.h>
 
 //Q for second order BCs
 //! \struct to manage sub-grid-distances (q) for second order Boundary Conditions (BCs)
@@ -33,7 +32,6 @@ typedef struct QforPrecursorBC{
    real* weightsNT, *weightsNB, *weightsST,  *weightsSB;
    real* last, *current, *next;
    real velocityX, velocityY, velocityZ;
-   cudaStream_t stream;
 }QforPrecursorBoundaryConditions;
 
 //BCTemp
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index 2adc40ede..40496abc6 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -22,8 +22,5 @@ if(BUILD_VF_UNIT_TESTS)
 	set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA)
     set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA)
     set_source_files_properties(BoundaryConditions/BoundaryConditionFactoryTest.cpp PROPERTIES LANGUAGE CUDA)
-    set_source_files_properties(Parameter/ParameterTest.cpp PROPERTIES LANGUAGE CUDA)
-    set_source_files_properties(KernelManager/BCKernelManagerTest.cpp PROPERTIES LANGUAGE CUDA)
-    set_source_files_properties(Parameter/EdgeNodeFinderTest.cpp PROPERTIES LANGUAGE CUDA)
     target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
 endif()
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 18fb60411..b37699962 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -1704,7 +1704,6 @@ void CudaMemoryManager::cudaAllocPrecursorData(int lev)
 {
     size_t size = parameter->getParH(lev)->precursorBC.numberOfPrecursorNodes*sizeof(real)*parameter->getParH(lev)->precursorBC.numberOfQuantities;
 
-    checkCudaErrors( cudaStreamCreate(&parameter->getParH(lev)->precursorBC.stream) );
     checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.last, size));
     checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.current, size));
     checkCudaErrors( cudaMallocHost((void**) &parameter->getParH(lev)->precursorBC.next, size));
@@ -1739,9 +1738,10 @@ void CudaMemoryManager::cudaCopyPrecursorBC(int lev)
 void CudaMemoryManager::cudaCopyPrecursorData(int lev)
 {
     auto prec = &parameter->getParH(lev)->precursorBC;
+    auto precStream = parameter->getStreamManager()->getStream(parameter->getStreamManager()->getPrecursorStreamIndex());
     size_t memSize = prec->numberOfPrecursorNodes*sizeof(real)*prec->numberOfQuantities;
-    checkCudaErrors( cudaStreamSynchronize(prec->stream) );
-    checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, prec->stream)) ;
+    checkCudaErrors( cudaStreamSynchronize(precStream) );
+    checkCudaErrors( cudaMemcpyAsync(parameter->getParD(lev)->precursorBC.next, prec->next, memSize, cudaMemcpyHostToDevice, precStream) );
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 83ca85243..d31ef0cb2 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -115,7 +115,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
     //////////////////////////////////////////////////////////////////////////
     // CUDA streams
     if (para->getUseStreams()) {
-        para->getStreamManager()->launchStreams(2u);
+        para->getStreamManager()->launchStreams(3u);
         para->getStreamManager()->createCudaEvents();
     }
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
index 3731836f3..e44c0885b 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
@@ -51,6 +51,8 @@ int CudaStreamManager::getBorderStreamIndex() { return borderStreamIndex; }
 
 int CudaStreamManager::getBulkStreamIndex() { return bulkStreamIndex; }
 
+int CudaStreamManager::getPrecursorStreamIndex() { return precursorStreamIndex; }
+
 void CudaStreamManager::createCudaEvents()
 {
     checkCudaErrors(cudaEventCreateWithFlags(&startBulkKernel, cudaEventDisableTiming));
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index c2d515ab5..d595fdce4 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -40,6 +40,7 @@ class CudaStreamManager
 private:
     std::vector<cudaStream_t> cudaStreams;
     cudaEvent_t startBulkKernel = NULL;
+    const int precursorStreamIndex    = 2;
     const int borderStreamIndex       = 1;
     const int bulkStreamIndex         = 0;
 
@@ -47,9 +48,9 @@ public:
     void launchStreams(uint numberOfStreams);
     void terminateStreams();
     cudaStream_t &getStream(uint streamIndex);
-
     int getBorderStreamIndex();
     int getBulkStreamIndex();
+    int getPrecursorStreamIndex();
 
     // Events
     void createCudaEvents();
-- 
GitLab