diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 324e0dbb4f37788f83c42614b7aeb0923e86a0f8..d62e9607d1325aad734458e33f032faaa0251697 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -86,9 +86,10 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage
             if (para->useReducedCommunicationAfterFtoC) {
                 prepareExchangeMultiGPU(para, level, -1); // TODO
                 exchangeMultiGPU(para, comm, cudaManager, level, -1); // TODO
+            } else {
+                prepareExchangeMultiGPU(para, level, -1);
+                exchangeMultiGPU(para, comm, cudaManager, level, -1);
             }
-            prepareExchangeMultiGPU(para, level, -1);
-            exchangeMultiGPU(para, comm, cudaManager, level, -1);
 
             coarseToFine(para, level);
         } else {
@@ -216,6 +217,14 @@ void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex)
         prepareExchangeCollDataXGPU27(para, level, streamIndex);
         prepareExchangeCollDataYGPU27(para, level, streamIndex);
         prepareExchangeCollDataZGPU27(para, level, streamIndex);
+    }   
+}
+
+void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex) {
+    if (para->getNumprocs() > 1) {
+        prepareExchangeCollDataXGPU27(para, level, streamIndex);
+        prepareExchangeCollDataYGPU27(para, level, streamIndex, true);
+        prepareExchangeCollDataZGPU27(para, level, streamIndex);
     }
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 7a58feb622ad7ed51604b96f7d005d0957fe741e..20da69d3245f746e18d526ae3e47b30ada30bf53 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -27,6 +27,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt
 extern "C" void collisionAdvectionDiffusion(Parameter* para, int level);
 
 extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex);
+extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex);
 
 extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
                                  int level, int streamIndex);
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 20b100f035164b3d283921366f9dea6021731dd8..29e5cb4890b9cb24cfbf2c15d98f65eb00cab1c5 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -108,14 +108,20 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex)
+void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC)
 {
     cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);   
+    std::vector<ProcessNeighbor27> *sendProcessNeighbor;
+    if (useReducedCommunicationAfterFtoC)
+        sendProcessNeighbor = &para->getParD(level)->sendProcessNeighborsAfterFtoCY;
+    else
+        sendProcessNeighbor = &para->getParD(level)->sendProcessNeighborY;
+
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
         GetSendFsPostDev27(para->getParD(level)->d0SP.f[0], 
                            para->getParD(level)->sendProcessNeighborY[i].f[0],
                            para->getParD(level)->sendProcessNeighborY[i].index,
-                           para->getParD(level)->sendProcessNeighborY[i].numberOfNodes,
+                           (*sendProcessNeighbor)[i].numberOfNodes,
                            para->getParD(level)->neighborX_SP, 
                            para->getParD(level)->neighborY_SP,
                            para->getParD(level)->neighborZ_SP, 
@@ -133,6 +139,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     //copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
         cudaManager->cudaCopyProcessNeighborYFsDH(level, i, streamIndex);
+    // todo: vorher pointer auf para->getParD(level)->sendProcessNeighborY[i].f[0] für sendProcessNeighborsAfterFtoCY übernehmen
 
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //start non blocking MPI receive
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 5b494c258e428aafcddfc60903c9962fbbc30469..6b537242ceddc6e33ac4f3fe65e466411a1b46f1 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -17,7 +17,7 @@ extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator
 extern "C" void prepareExchangeCollDataXGPU27(Parameter *para, int level, int streamIndex);
 extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
                                        int level, int streamIndex);
-extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex);
+extern "C" void prepareExchangeCollDataYGPU27(Parameter *para, int level, int streamIndex, bool useReducedCommunicationAfterFtoC = false);
 extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
                                        int level, int streamIndex);
 extern "C" void prepareExchangeCollDataZGPU27(Parameter *para, int level, int streamIndex);
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index 4f3a56b875567f036c0907ecf6ce3df249d38374..a33b3b792cd451307825fd0b2c8716e942440582 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -235,11 +235,6 @@ typedef struct PN27{
 	int numberOfFs;
 }ProcessNeighbor27;
 
-typedef struct PN27AfterFtoC {
-    uint memsizeFs;
-    int numberOfNodes;
-} ProcessNeighbor27AfterFtoC;
-
 typedef struct PN_F3 {
 	real* g[6];
 	uint memsizeGs;
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 4c702b6bdf52a751c8f7e3880e1542de6fef725f..88531ea387d898986e72216164b6d4be8b205770 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -1436,7 +1436,7 @@ void Parameter::setIsNeighborZ(bool isNeigbor)
 void Parameter::setSendProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex) {
     this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
     this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
-    this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs     = sizeof(real) *numberOfNodes;
+    this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs     = sizeof(real) * numberOfNodes;
     this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].memsizeFs     = sizeof(real) * numberOfNodes;
 }
 void Parameter::setSendProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex)
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 6bd504aae241289a6862d9ab800a775df634532b..d05da93f3cdb401365c7953211158123f731f5a9 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -294,12 +294,12 @@ struct LBMSimulationParameter
     std::vector<ProcessNeighbor27> recvProcessNeighborY;
     std::vector<ProcessNeighbor27> recvProcessNeighborZ;
 
-    std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCX;
-    std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCY;
-    std::vector<ProcessNeighbor27AfterFtoC> sendProcessNeighborsAfterFtoCZ;
-    std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCX;
-    std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCY;
-    std::vector<ProcessNeighbor27AfterFtoC> recvProcessNeighborsAfterFtoCZ;
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCX;
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCY;
+    std::vector<ProcessNeighbor27> sendProcessNeighborsAfterFtoCZ;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCX;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCY;
+    std::vector<ProcessNeighbor27> recvProcessNeighborsAfterFtoCZ;
     ///////////////////////////////////////////////////////
     // 3D domain decomposition convection diffusion
     std::vector<ProcessNeighbor27> sendProcessNeighborADX;