diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index d28f7f4e4175d6a9f75ddc8edac8410a08560068..5726f1e6e52597154de29958246b0d715a7f5d08 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -77,8 +77,8 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage
         } else {
             //fineToCoarse(para, level);
 
-            fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC,
-                                   para->getParD(level)->intFCBorder.kFC, -1);
+            //fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC,
+                                   //para->getParD(level)->intFCBorder.kFC, -1);
             fineToCoarseWithStream(para, level, para->getParD(level)->intFCBulk.ICellFCC,
                                    para->getParD(level)->intFCBulk.kFC, -1);
 
@@ -988,7 +988,7 @@ void fineToCoarse(Parameter* para, int level)
 							para->getParD(level)->K_FC,           para->getParD(level)->omega,           para->getParD(level+1)->omega, 
 							para->getParD(level)->vis,            para->getParD(level)->nx,              para->getParD(level)->ny, 
 							para->getParD(level+1)->nx,           para->getParD(level+1)->ny,            para->getParD(level)->numberofthreads,
-							para->getParD(level)->offFC);
+							para->getParD(level)->offFC,          CU_STREAM_LEGACY);
     getLastCudaError("ScaleFC27_RhoSq_comp execution failed");
 
 	//ScaleFC_AA2016_comp_27( para->getParD(level)->d0SP.f[0],      para->getParD(level+1)->d0SP.f[0], 
@@ -1136,17 +1136,17 @@ void fineToCoarse(Parameter* para, int level)
 void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex)
 {
     cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+
+    ScaleFC_RhoSq_comp_27(	para->getParD(level)->d0SP.f[0],      para->getParD(level+1)->d0SP.f[0], 
+							para->getParD(level)->neighborX_SP,   para->getParD(level)->neighborY_SP,    para->getParD(level)->neighborZ_SP, 
+							para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP,  para->getParD(level+1)->neighborZ_SP, 
+							para->getParD(level)->size_Mat_SP,    para->getParD(level+1)->size_Mat_SP,   para->getParD(level)->evenOrOdd,
+							iCellFCC,                             para->getParD(level)->intFC.ICellFCF, 
+							k_FC,                                 para->getParD(level)->omega,           para->getParD(level + 1)->omega, 
+							para->getParD(level)->vis,            para->getParD(level)->nx,              para->getParD(level)->ny, 
+							para->getParD(level+1)->nx,           para->getParD(level+1)->ny,            para->getParD(level)->numberofthreads,
+							para->getParD(level)->offFC,          stream);
     getLastCudaError("ScaleFC27_RhoSq_comp execution failed");
-    ScaleFC_RhoSq_comp_27_Stream( para->getParD(level)->d0SP.f[0],      para->getParD(level+1)->d0SP.f[0], 
-							      para->getParD(level)->neighborX_SP,   para->getParD(level)->neighborY_SP,    para->getParD(level)->neighborZ_SP, 
-							      para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP,  para->getParD(level+1)->neighborZ_SP, 
-							      para->getParD(level)->size_Mat_SP,    para->getParD(level+1)->size_Mat_SP,   para->getParD(level)->evenOrOdd,
-                                  iCellFCC,                             para->getParD(level)->intFC.ICellFCF, 
-							      k_FC,                                 para->getParD(level)->omega,           para->getParD(level + 1)->omega, 
-							      para->getParD(level)->vis,            para->getParD(level)->nx,              para->getParD(level)->ny, 
-							      para->getParD(level+1)->nx,           para->getParD(level+1)->ny,            para->getParD(level)->numberofthreads,
-							      para->getParD(level)->offFC,          stream);
-    getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed");
 
     //////////////////////////////////////////////////////////////////////////
     // A D V E C T I O N    D I F F U S I O N
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index e23644514bc87ae74fa825be61df0a00332a8621..5b7a32481bd076c70eb62f7a7fdfe8afa32936a7 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -2091,33 +2091,9 @@ extern "C" void ScaleFC_RhoSq_comp_27(  real* DC,
 										unsigned int nyC, 
 										unsigned int nxF, 
 										unsigned int nyF,
-										unsigned int numberOfThreads,
-										OffFC offFC);
-
-extern "C" void ScaleFC_RhoSq_comp_27_Stream(	real* DC, 
-												real* DF, 
-												unsigned int* neighborCX,
-												unsigned int* neighborCY,
-												unsigned int* neighborCZ,
-												unsigned int* neighborFX,
-												unsigned int* neighborFY,
-												unsigned int* neighborFZ,
-												unsigned int size_MatC, 
-												unsigned int size_MatF, 
-												bool evenOrOdd,
-												unsigned int* posC, 
-												unsigned int* posFSWB, 
-												unsigned int kFC, 
-												real omCoarse, 
-												real omFine, 
-												real nu, 
-												unsigned int nxC, 
-												unsigned int nyC, 
-												unsigned int nxF, 
-												unsigned int nyF, 
-											    unsigned int numberOfThreads,
-												OffFC offFC,                                              
-												CUstream_st *stream);
+										unsigned int numberOfThreads, 
+	                                    OffFC offFC,
+                                        CUstream_st *stream);
 
 extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 3885c24dc4476443d50af61cf293ac3cc8d2e12e..6e072c6dc490c7671aa81bcfe14df7d27683cd16 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -6201,7 +6201,8 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
 									  unsigned int nxF, 
 									  unsigned int nyF,
 									  unsigned int numberOfThreads,
-									  OffFC offFC)
+									  OffFC offFC,
+                                      CUstream_st *stream)
 {
    int Grid = (kFC / numberOfThreads)+1;
    int Grid1, Grid2;
@@ -6218,7 +6219,8 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
    dim3 gridINT_FC(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      scaleFC_RhoSq_comp_27<<< gridINT_FC, threads >>>(DC, 
+      scaleFC_RhoSq_comp_27<<<gridINT_FC, threads, 0, stream>>>(
+													   DC, 
 													   DF, 
 													   neighborCX,
 													   neighborCY,
@@ -6242,70 +6244,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
 													   offFC);
       getLastCudaError("scaleFC_RhoSq_27 execution failed"); 
 }
-extern "C" void ScaleFC_RhoSq_comp_27_Stream(real * DC, 
-											 real * DF, 
-											 unsigned int * neighborCX, 
-											 unsigned int * neighborCY, 
-											 unsigned int * neighborCZ, 
-											 unsigned int * neighborFX, 
-											 unsigned int * neighborFY, 
-											 unsigned int * neighborFZ, 
-											 unsigned int size_MatC, 
-											 unsigned int size_MatF, 
-											 bool evenOrOdd, 
-											 unsigned int * posC, 
-											 unsigned int * posFSWB, 
-											 unsigned int kFC, 
-											 real omCoarse, 
-											 real omFine, 
-											 real nu, 
-											 unsigned int nxC, 
-											 unsigned int nyC, 
-											 unsigned int nxF, 
-											 unsigned int nyF, 
-											 unsigned int numberOfThreads,
-											 OffFC offFC,
-											 CUstream_st* stream)
-{
-   int Grid = (kFC / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   } 
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridINT_FC(Grid1, Grid2);
-   dim3 threads(numberOfThreads, 1, 1 );
 
-   scaleFC_RhoSq_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, 
-										                       DF, 
-										                       neighborCX,
-										                       neighborCY,
-										                       neighborCZ,
-										                       neighborFX,
-										                       neighborFY,
-										                       neighborFZ,
-										                       size_MatC, 
-										                       size_MatF, 
-										                       evenOrOdd,
-										                       posC, 
-										                       posFSWB, 
-										                       kFC, 
-										                       omCoarse, 
-										                       omFine, 
-										                       nu, 
-										                       nxC, 
-										                       nyC, 
-										                       nxF, 
-										                       nyF,
-										                       offFC);
-   getLastCudaError("scaleFC_RhoSq_27 execution failed"); 
-}
 //////////////////////////////////////////////////////////////////////////
 extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF,