diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 6e73c1e88faca72d0c96af53f60d4d87ae5309f3..4b180fb65383ad4d9ad745ea910677001d6d5b31 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -7,6 +7,7 @@
 //#include "Output/UnstructuredGridWriter.hpp"
 #include "Communication/ExchangeData27.h"
 #include "Kernel/Kernel.h"
+#include "Parameter/CudaStreamManager.h"
 
 void updateGrid27(Parameter* para, 
                   vf::gpu::Communicator* comm, 
@@ -41,10 +42,10 @@ void updateGrid27(Parameter* para,
         //prepare exchange and trigger bulk kernel when finished
         prepareExchangeMultiGPU(para, level, borderStreamIndex);
         if (para->getUseStreams())
-            para->getStreamManager().triggerStartBulkKernel(borderStreamIndex);
+            para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
 
         // launch bulk kernel
-        para->getStreamManager().waitOnStartBulkKernelEvent(bulkStreamIndex);
+        para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
         collisionUsingIndex(para, pm, level, t, kernels, para->getParD(level)->fluidNodeIndices,
                             para->getParD(level)->numberOfFluidNodes, bulkStreamIndex);
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 99a3489a03de1f20cf447664928bde0ef6bd0108..20b100f035164b3d283921366f9dea6021731dd8 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -1,6 +1,7 @@
 #include "Communication/ExchangeData27.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
+#include "Parameter/CudaStreamManager.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //3D domain decomposition
@@ -9,7 +10,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex) 
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
         GetSendFsPostDev27(para->getParD(level)->d0SP.f[0],
                            para->getParD(level)->sendProcessNeighborX[i].f[0],
@@ -27,7 +28,7 @@ void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex)
 void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
                                 int streamIndex)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
@@ -109,7 +110,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);   
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);   
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
         GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], 
                            para->getParD(level)->sendProcessNeighborY[i].f[0],
@@ -127,7 +128,7 @@ void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex)
 void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
                                 int streamIndex)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
@@ -219,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex) {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);   
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);   
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
         GetSendFsPostDev27(para->getParD(level)->d0SP.f[0],
                            para->getParD(level)->sendProcessNeighborZ[i].f[0],
@@ -237,7 +238,7 @@ void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex)
 void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
                                 int streamIndex)
 {
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 72a17e863c4fd1588561ac53e2e9b23e2c0309a5..cf1314b1bffde1a38f016c7bc45b172757d22cdb 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -5,6 +5,7 @@
 #include <math.h>
 
 #include <Parameter/Parameter.h>
+#include "Parameter/CudaStreamManager.h"
 
 #include "Calculation/PorousMedia.h"
 
@@ -520,7 +521,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int proce
                          parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
                          parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].memsizeFs,
                          cudaMemcpyHostToDevice,
-                         parameter->getStreamManager().getStream(streamIndex)));
+                         parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, int streamIndex)
 {
@@ -534,7 +535,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce
     								     parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0], 
     								     parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].memsizeFs, 
     								     cudaMemcpyDeviceToHost,
-                                         parameter->getStreamManager().getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor)
 {
@@ -588,7 +589,8 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce
         checkCudaErrors(cudaMemcpyAsync(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0],
                         parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
                         parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].memsizeFs,
-                        cudaMemcpyHostToDevice, parameter->getStreamManager().getStream(streamIndex)));
+                        cudaMemcpyHostToDevice, 
+                        parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, int streamIndex)
 {
@@ -602,7 +604,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int proce
             cudaMemcpyAsync(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0],
                             parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
                             parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].memsizeFs,
-                                        cudaMemcpyDeviceToHost, parameter->getStreamManager().getStream(streamIndex)));
+                            cudaMemcpyDeviceToHost, parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor)
 {
@@ -657,7 +659,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce
 	    				                 parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0], 
 	    				                 parameter->getD3Qxx() * parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].memsizeFs, 
 	    				                 cudaMemcpyHostToDevice, 
-                                         parameter->getStreamManager().getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, int streamIndex)
 {   
@@ -671,7 +673,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce
 	        						     parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0], 
 	        						     parameter->getD3Qxx() * parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].memsizeFs, 
 	        						     cudaMemcpyDeviceToHost, 
-                                         parameter->getStreamManager().getStream(streamIndex)));
+                                         parameter->getStreamManager()->getStream(streamIndex)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu
index b2ba5fdb1c7db76dccf3d0da1c494618ad6afaaa..76345fcd84d9ad11a2ad1929870e39a6a2a08814 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimSparse/CumulantK17CompChimSparse.cu
@@ -1,6 +1,7 @@
 #include "CumulantK17CompChimSparse.h"
 
 #include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
 #include "CumulantK17CompChimSparse_Device.cuh"
 
 #include <cuda.h>
@@ -39,7 +40,7 @@ void CumulantK17CompChimSparse::runOnIndices(const unsigned int *indices, unsign
     std::tie(grid, threads) =
         *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads);
 
-    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager().getStream(streamIndex);
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
 
     LB_Kernel_CumulantK17CompChimSparse<<<grid, threads, 0, stream>>>(
         para->getParD(level)->omega, 
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 6a4761963c3dd2e8928097d4690a1941c8387d9b..9658d4857f043ed0713f1c6b9d25e689db5257c5 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -9,6 +9,7 @@
 #include "Communication/Communicator.h"
 #include "Communication/ExchangeData27.h"
 #include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
 #include "GPU/GPU_Interface.h"
 #include "GPU/devCheck.h"
 #include "basics/utilities/UbFileOutputASCII.h"
@@ -106,8 +107,8 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    //////////////////////////////////////////////////////////////////////////
    // CUDA streams
    if (para->getUseStreams()) {
-       para->getStreamManager().launchStreams(2u);
-       para->getStreamManager().createCudaEvents();
+       para->getStreamManager()->launchStreams(2u);
+       para->getStreamManager()->createCudaEvents();
    }
    //////////////////////////////////////////////////////////////////////////
    // 
@@ -1171,8 +1172,8 @@ void Simulation::free()
 {
 	// Cuda Streams
     if (para->getUseStreams()) {
-        para->getStreamManager().destroyCudaEvents();
-        para->getStreamManager().terminateStreams();
+        para->getStreamManager()->destroyCudaEvents();
+        para->getStreamManager()->terminateStreams();
 	}
 
 	//CudaFreeHostMemory
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 440e589847c0ff3a1952e49b212ee6219e5a6086..56bb66f1a07d4ee533ebd2a66ca3eed494d295e7 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -42,6 +42,8 @@
 
 #include <basics/config/ConfigurationFile.h>
 
+#include "Parameter/CudaStreamManager.h"
+
 
 
 Parameter::Parameter(const vf::basics::ConfigurationFile &configData, int numberOfProcesses, int myId)
@@ -2487,12 +2489,12 @@ real Parameter::TrafoZtoMGsWorld(int CoordZ, int level)
 
 void Parameter::setUseStreams() {
 	this->useStreams = true;
-    this->cudaStreamManager = CudaStreamManager();
+    this->cudaStreamManager = std::make_unique<CudaStreamManager>();
 }
 
 bool Parameter::getUseStreams() { return this->useStreams; }
 
-CudaStreamManager &Parameter::getStreamManager() { return this->cudaStreamManager; }
+std::shared_ptr<CudaStreamManager> &Parameter::getStreamManager() { return this->cudaStreamManager; }
 
 void Parameter::findCornerNodesCommMultiGPU() { 
 	for (uint level = 0; level < parH.size(); level++) {
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 66f36122f08da3910b42ade203cb6ef7fdc628e5..813287d9ccfe4258690e0c07322d885d43b5d9c3 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -43,8 +43,6 @@
 
 #include "VirtualFluids_GPU_export.h"
 
-#include "Parameter/CudaStreamManager.h"
-
 struct curandStateXORWOW;
 typedef struct curandStateXORWOW curandState;
 namespace vf
@@ -53,7 +51,8 @@ namespace basics
 {
 class ConfigurationFile;
 }
-}
+} 
+class CudaStreamManager;
 
 //! \struct LBMSimulationParameter
 //! \brief struct holds and manages the LB-parameter of the simulation
@@ -857,12 +856,12 @@ private:
 
     // cuda streams
     bool useStreams { false };
-    CudaStreamManager cudaStreamManager;
+    std::shared_ptr<CudaStreamManager> cudaStreamManager;
 
 public:
     void setUseStreams();
     bool getUseStreams();
-    CudaStreamManager &getStreamManager();
+    std::shared_ptr<CudaStreamManager> &getStreamManager();
 
     void findCornerNodesCommMultiGPU();
     void findCornerNodesXY(int level);