From 520f1db177ba5e299ed56e53144b430b73685b78 Mon Sep 17 00:00:00 2001
From: Anna Wellmann <a.wellmann@tu-braunschweig.de>
Date: Sun, 28 Nov 2021 09:43:42 +0100
Subject: [PATCH] Not use edge node routine on CPU in version without streams

---
 .../Calculation/UpdateGrid27.cpp              | 54 ++++++++++++++++---
 .../Calculation/UpdateGrid27.h                |  2 +
 .../Communication/ExchangeData27.cpp          |  6 +--
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 5ba634900..a05b1c888 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -85,8 +85,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int lev
 {
     fineToCoarse(para.get(), level);
 
-    prepareExchangeMultiGPUAfterFtoC(para.get(), level, -1);
-    exchangeMultiGPUAfterFtoC(para.get(), comm, cudaManager.get(), level, -1);
+    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, true);
 
     coarseToFine(para.get(), level);
 }
@@ -95,8 +94,7 @@ void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level)
 {
     fineToCoarse(para.get(), level);
 
-    prepareExchangeMultiGPU(para.get(), level, -1);
-    exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
+    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
 
     coarseToFine(para.get(), level);
 }
@@ -111,15 +109,13 @@ void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigne
 {
     collisionUsingIndex(para.get(), pm, level, t, kernels, para->getParD(level)->fluidNodeIndices,
                             para->getParD(level)->numberOfFluidNodes, -1);
-    prepareExchangeMultiGPU(para.get(), level, -1);
-    exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
+    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
 }
 
 void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t)
 {
     collision(para.get(), pm, level, t, kernels);
-    prepareExchangeMultiGPU(para.get(), level, -1);
-    exchangeMultiGPU(para.get(), comm, cudaManager.get(), level, -1);
+    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaManager.get(), level, false);
 }
 
 void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t)
@@ -298,6 +294,48 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryMa
     // 1D domain decomposition
     // exchangePostCollDataGPU27(para, comm, level);
 }
+void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm)
+{
+    //////////////////////////////////////////////////////////////////////////
+    // 3D domain decomposition
+    if (useReducedComm) {
+        // X
+        prepareExchangeCollDataXGPU27AfterFtoC(para, level, -1);
+        exchangeCollDataXGPU27AfterFtoC(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferXGPU27AfterFtoC(para, level, -1);
+        // Y
+        prepareExchangeCollDataYGPU27AfterFtoC(para, level, -1);
+        exchangeCollDataYGPU27AfterFtoC(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferYGPU27AfterFtoC(para, level, -1);
+        // Z
+        prepareExchangeCollDataZGPU27AfterFtoC(para, level, -1);
+        exchangeCollDataZGPU27AfterFtoC(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferZGPU27AfterFtoC(para, level, -1);  
+    } else {
+        // X
+        prepareExchangeCollDataXGPU27AllNodes(para, level, -1);
+        exchangeCollDataXGPU27AllNodes(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferXGPU27AllNodes(para, level, -1);
+        // Y
+        prepareExchangeCollDataYGPU27AllNodes(para, level, -1);
+        exchangeCollDataYGPU27AllNodes(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferYGPU27AllNodes(para, level, -1);
+        // Z
+        prepareExchangeCollDataZGPU27AllNodes(para, level, -1);
+        exchangeCollDataZGPU27AllNodes(para, comm, cudaManager, level, -1);
+        scatterNodesFromRecvBufferZGPU27AllNodes(para, level, -1);   
+    }
+
+    //////////////////////////////////////////////////////////////////////////
+    // 3D domain decomposition convection diffusion
+    if (para->getDiffOn()) {
+        if (para->getUseStreams())
+            std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl;
+        exchangePostCollDataADXGPU27(para, comm, cudaManager, level);
+        exchangePostCollDataADYGPU27(para, comm, cudaManager, level);
+        exchangePostCollDataADZGPU27(para, comm, cudaManager, level);
+    }
+}
 void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
                                int streamIndex)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 32b4bf82b..d55e4ee72 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -63,6 +63,8 @@ extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator *comm, C
                                  int level, int streamIndex);
 extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager,
                                  int level, int streamIndex);
+extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator *comm,
+                                                       CudaMemoryManager *cudaManager, int level, bool useReducedComm);
 
 extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t);
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 587d2bd71..30d9e7a3d 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -220,7 +220,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     if (para->getUseStreams()) cudaStreamSynchronize(stream);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // edge nodes: copy received node values from x
-    if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
+    if (para->getUseStreams()  && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
         uint indexInSubdomainX = 0;
         uint indexInSubdomainY = 0;
         uint numNodesInBufferX = 0;
@@ -323,7 +323,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     if (para->getUseStreams()) cudaStreamSynchronize(stream);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // edge nodes: copy received node values from x
-    if (para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
+    if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
         uint indexInSubdomainX = 0;
         uint indexInSubdomainZ = 0;
         uint numNodesInBufferX = 0;
@@ -343,7 +343,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     }
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // edge nodes: copy received node values from y
-    if (para->getNumberOfProcessNeighborsY(level, "recv") > 0) {
+    if (para->getUseStreams() && para->getNumberOfProcessNeighborsY(level, "recv") > 0) {
         uint indexInSubdomainY = 0;
         uint indexInSubdomainZ = 0;
         uint numNodesInBufferY = 0;
-- 
GitLab