From 68d16794cb2c82e94556860cb057af1c1a74a057 Mon Sep 17 00:00:00 2001
From: Anna Wellmann <a.wellmann@tu-bs.de>
Date: Mon, 25 Apr 2022 10:15:49 +0200
Subject: [PATCH] Use copyEdgeNodes for all directions

---
 .../Communication/ExchangeData27.cpp          | 88 ++++++-------------
 1 file changed, 29 insertions(+), 59 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index d792c4b1e..eb8f7aa81 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -69,6 +69,30 @@ void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::g
     }
 }
 
+
+void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes, std::vector<ProcessNeighbor27> &recvProcessNeighborHostAllNodes, std::vector<ProcessNeighbor27> &sendProcessNeighborHostAllNodes,  
+                   std::vector<ProcessNeighbor27> &sendProcessNeighborHost)
+{
+    uint indexInSubdomainRecv = 0;
+    uint indexInSubdomainSend = 0;
+    uint numNodesInBufferRecv = 0;
+    uint numNodesInBufferSend = 0;
+#pragma omp parallel for
+    for (uint i = 0; i < edgeNodes.size(); i++) {
+        indexInSubdomainRecv = edgeNodes[i].indexOfProcessNeighborRecv;
+        indexInSubdomainSend = edgeNodes[i].indexOfProcessNeighborSend;
+        numNodesInBufferRecv = recvProcessNeighborHostAllNodes[indexInSubdomainRecv].numberOfNodes;
+        numNodesInBufferSend = sendProcessNeighborHostAllNodes[indexInSubdomainSend].numberOfNodes;
+        if(edgeNodes[i].indexInSendBuffer >= sendProcessNeighborHost[indexInSubdomainSend].numberOfNodes){
+            // for reduced communication after fine to coarse: only copy send nodes which are not part of the reduced comm
+            continue;
+        }
+        for (uint direction = 0; direction <= dirEND; direction++) {
+            (sendProcessNeighborHostAllNodes[indexInSubdomainSend].f[0] + (direction * numNodesInBufferSend))[edgeNodes[i].indexInSendBuffer] = 1000;
+               // (recvProcessNeighborHostAllNodes[indexInSubdomainRecv].f[0] + (direction * numNodesInBufferRecv))[edgeNodes[i].indexInRecvBuffer];
+        }
+    }
+}
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -220,23 +244,9 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     if (para->getUseStreams()) cudaStreamSynchronize(stream);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // edge nodes: copy received node values from x
-    if (para->getUseStreams()  && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
-        uint indexInSubdomainX = 0;
-        uint indexInSubdomainY = 0;
-        uint numNodesInBufferX = 0;
-        uint numNodesInBufferY = 0;  
-#pragma omp parallel for
-        for (uint i = 0; i < para->getParH(level)->edgeNodesXtoY.size(); i++) {
-            indexInSubdomainX = para->getParH(level)->edgeNodesXtoY[i].indexOfProcessNeighborRecv;
-            indexInSubdomainY = para->getParH(level)->edgeNodesXtoY[i].indexOfProcessNeighborSend;
-            numNodesInBufferX = para->getParH(level)->recvProcessNeighborX[indexInSubdomainX].numberOfNodes;
-            numNodesInBufferY = para->getParH(level)->sendProcessNeighborY[indexInSubdomainY].numberOfNodes;
-
-            for (uint direction = 0; direction <= dirEND; direction++) {
-                (para->getParH(level)->sendProcessNeighborY[indexInSubdomainY].f[0]+(direction * numNodesInBufferY))[para->getParH(level)->edgeNodesXtoY[i].indexInSendBuffer] =  
-                (para->getParH(level)->recvProcessNeighborX[indexInSubdomainX].f[0]+(direction * numNodesInBufferX))[para->getParH(level)->edgeNodesXtoY[i].indexInRecvBuffer];
-            }
-        }    
+    if (para->getUseStreams() && para->getNumberOfProcessNeighborsX(level, "recv") > 0) {
+        copyEdgeNodes(para->getParH(level)->edgeNodesXtoY, para->getParH(level)->recvProcessNeighborX, para->getParH(level)->sendProcessNeighborY, 
+                      *sendProcessNeighborHost);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost);
@@ -303,32 +313,6 @@ void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int s
                                   (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
-void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes, std::vector<ProcessNeighbor27> &recvProcessNeighborHostAllNodes, std::vector<ProcessNeighbor27> &sendProcessNeighborHostAllNodes,  
-                   std::vector<ProcessNeighbor27> &sendProcessNeighborHost)
-{
-        uint indexInSubdomainRecv = 0;
-        uint indexInSubdomainSend = 0;
-        uint numNodesInBufferRecv = 0;
-        uint numNodesInBufferSend = 0;
-#pragma omp parallel for
-        for (uint i = 0; i < edgeNodes.size(); i++) {
-            indexInSubdomainRecv = edgeNodes[i].indexOfProcessNeighborRecv;
-            indexInSubdomainSend = edgeNodes[i].indexOfProcessNeighborSend;
-            numNodesInBufferRecv = recvProcessNeighborHostAllNodes[indexInSubdomainRecv].numberOfNodes;
-            numNodesInBufferSend = sendProcessNeighborHostAllNodes[indexInSubdomainSend].numberOfNodes;
-
-            if(edgeNodes[i].indexInSendBuffer >= sendProcessNeighborHost[indexInSubdomainSend].numberOfNodes){
-                // for reduced communication after fine to coarse: only copy send nodes which are not part of the reduced comm
-                continue;
-            }
-
-            for (uint direction = 0; direction <= dirEND; direction++) {
-                (sendProcessNeighborHostAllNodes[indexInSubdomainSend].f[0] + (direction * numNodesInBufferSend))[edgeNodes[i].indexInSendBuffer] = 1000;
-                   // (recvProcessNeighborHostAllNodes[indexInSubdomainRecv].f[0] + (direction * numNodesInBufferRecv))[edgeNodes[i].indexInRecvBuffer];
-            }
-        }
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManager *cudaManager, int level,
                             int streamIndex, 
@@ -356,22 +340,8 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator *comm, CudaMe
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // edge nodes: copy received node values from y
     if (para->getUseStreams() && para->getNumberOfProcessNeighborsY(level, "recv") > 0) {
-        uint indexInSubdomainY = 0;
-        uint indexInSubdomainZ = 0;
-        uint numNodesInBufferY = 0;
-        uint numNodesInBufferZ = 0;
-#pragma omp parallel for
-        for (uint i = 0; i < para->getParH(level)->edgeNodesYtoZ.size(); i++) {
-            indexInSubdomainY = para->getParH(level)->edgeNodesYtoZ[i].indexOfProcessNeighborRecv;
-            indexInSubdomainZ = para->getParH(level)->edgeNodesYtoZ[i].indexOfProcessNeighborSend;
-            numNodesInBufferY = para->getParH(level)->recvProcessNeighborY[indexInSubdomainY].numberOfNodes;
-            numNodesInBufferZ = para->getParH(level)->sendProcessNeighborZ[indexInSubdomainZ].numberOfNodes;
-
-            for (uint direction = 0; direction <= dirEND; direction++) {
-                (para->getParH(level)->sendProcessNeighborZ[indexInSubdomainZ].f[0] + (direction * numNodesInBufferZ))[para->getParH(level)->edgeNodesYtoZ[i].indexInSendBuffer] =
-                    (para->getParH(level)->recvProcessNeighborY[indexInSubdomainY].f[0] + (direction * numNodesInBufferY))[para->getParH(level)->edgeNodesYtoZ[i].indexInRecvBuffer];
-            }
-        }
+        copyEdgeNodes(para->getParH(level)->edgeNodesYtoZ, para->getParH(level)->recvProcessNeighborY, para->getParH(level)->sendProcessNeighborZ, 
+                      *sendProcessNeighborHost);
     }
     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost);
-- 
GitLab