From 952aafde6a3dff5a28acda6a7e2bbe2f039bfcfe Mon Sep 17 00:00:00 2001
From: Anna Wellmann <a.wellmann@tu-braunschweig.de>
Date: Tue, 14 Sep 2021 15:15:21 +0200
Subject: [PATCH] Split fine to coarse kernel into border and bulk

---
 apps/gpu/LBM/MusselOyster/MusselOyster.cpp    |  6 +--
 src/gpu/GridGenerator/grid/Grid.h             |  1 +
 .../grid/GridBuilder/GridBuilder.h            |  1 +
 .../grid/GridBuilder/LevelGridBuilder.cpp     |  5 ++
 .../grid/GridBuilder/LevelGridBuilder.h       |  1 +
 src/gpu/GridGenerator/grid/GridImp.cu         | 29 +++++++++++
 src/gpu/GridGenerator/grid/GridImp.h          |  1 +
 .../Calculation/UpdateGrid27.cpp              | 48 +++++++++----------
 .../Calculation/UpdateGrid27.h                |  4 +-
 .../GridReaderGenerator/GridGenerator.cpp     | 33 +++++++++++++
 src/gpu/VirtualFluids_GPU/LBM/LB.h            |  5 ++
 .../VirtualFluids_GPU/Parameter/Parameter.h   |  3 ++
 12 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index 79d6637b3..f57b20775 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -110,7 +110,7 @@ void multipleLevel(const std::string& configPath)
 
     bool useGridGenerator = true;
     bool useMultiGPU = false;
-    bool useStreams  = false;
+    bool useStreams  = true;
     bool useLevels = true;
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -118,7 +118,7 @@ void multipleLevel(const std::string& configPath)
     std::string bivalveType = "MUSSEL"; // "MUSSEL" "OYSTER"
     std::string gridPath(gridPathParent + bivalveType); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
 
-    real dxGrid = (real)2.0;
+    real dxGrid = (real)1.0;
     real vxLB = (real)0.051; // LB units
     real Re = (real)300.0;
     real viscosityLB = (vxLB * dxGrid) / Re;
@@ -139,7 +139,7 @@ void multipleLevel(const std::string& configPath)
 
     
     para->setTOut(1000);
-    para->setTEnd(1000);
+    para->setTEnd(10000);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h
index 460b7a171..387a9a7b0 100644
--- a/src/gpu/GridGenerator/grid/Grid.h
+++ b/src/gpu/GridGenerator/grid/Grid.h
@@ -51,6 +51,7 @@ public:
     HOSTDEVICE virtual void setFieldEntry(uint matrixIndex, char type) = 0;
 
     CUDA_HOST virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const = 0;
+    CUDA_HOST virtual void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const = 0;
 
     CUDA_HOST virtual int *getNeighborsX() const = 0;
     CUDA_HOST virtual int *getNeighborsY() const = 0;
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index 9ff99fad4..ebe8ac8f9 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -56,6 +56,7 @@ public:
     virtual uint getNumberOfNodesCF(int level) = 0;
     virtual uint getNumberOfNodesFC(int level) = 0;
     virtual void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf, int level) const = 0;
+    virtual void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const = 0;
 
     virtual void getOffsetFC(real* xOffCf, real* yOffCf, real* zOffCf, int level) = 0;
     virtual void getOffsetCF(real* xOffFc, real* yOffFc, real* zOffFc, int level) = 0;
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index ba2555654..fa0166a04 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -206,6 +206,11 @@ void LevelGridBuilder::getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, u
     this->grids[level]->getGridInterfaceIndices(iCellCfc, iCellCff, iCellFcc, iCellFcf);
 }
 
+void LevelGridBuilder::getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const
+{
+    this->grids[level]->getGridInterfaceIndicesFCCBorderBulk(iCellFccBorder, intFCBorderKfc, iCellFccBulk, intFCBulkKfc, level);
+}
+
 void LevelGridBuilder::getOffsetFC(real * xOffFC, real * yOffFC, real * zOffFC, int level)
 {
     for (uint i = 0; i < getNumberOfNodesFC(level); i++)
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 4cbb927a2..542df12d1 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -137,6 +137,7 @@ public:
     GRIDGENERATOR_EXPORT uint getNumberOfNodesFC(int level) override;
 
     GRIDGENERATOR_EXPORT void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf, int level) const override;
+    GRIDGENERATOR_EXPORT void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const override;
 
     GRIDGENERATOR_EXPORT void getOffsetFC(real* xOffCf, real* yOffCf, real* zOffCf, int level) override;
     GRIDGENERATOR_EXPORT void getOffsetCF(real* xOffFc, real* yOffFc, real* zOffFc, int level) override;
diff --git a/src/gpu/GridGenerator/grid/GridImp.cu b/src/gpu/GridGenerator/grid/GridImp.cu
index 9a6c88e58..5b9e0da36 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cu
+++ b/src/gpu/GridGenerator/grid/GridImp.cu
@@ -1943,6 +1943,35 @@ void GridImp::getGridInterface(uint* gridInterfaceList, const uint* oldGridInter
         gridInterfaceList[i] = oldGridInterfaceList[i] + 1; // + 1 for numbering shift between GridGenerator and VF_GPU
 }
 
+void GridImp::getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk,
+                                                   uint &intFCBulkKfc, int level) const
+{
+    // reorder the array of FCC indices and return pointers and sizes of the new subarrays
+
+    uint *iCellFccAll = iCellFccBorder;
+    uint intFCKfcAll = this->gridInterface->fc.numberOfEntries;
+    std::vector<uint> iCellFccBorderVector;
+    std::vector<uint> iCellFccBulkVector;
+
+    for (uint i = 0; i < intFCKfcAll; i++)
+        if (std::find(this->fluidNodeIndicesBorder.begin(), this->fluidNodeIndicesBorder.end(), iCellFccAll[i]) !=
+            this->fluidNodeIndicesBorder.end())
+            iCellFccBorderVector.push_back(iCellFccAll[i]);
+        else
+            iCellFccBulkVector.push_back(iCellFccAll[i]);
+
+    intFCBorderKfc = (uint)iCellFccBorderVector.size();
+    intFCBulkKfc   = (uint)iCellFccBulkVector.size();
+    iCellFccBulk   = iCellFccBorder + intFCBorderKfc; 
+
+    for (uint i = 0; i < (uint)iCellFccBorderVector.size(); i++)
+        iCellFccBorder[i] = iCellFccBorderVector[i];
+    for (uint i = 0; i < (uint)iCellFccBulkVector.size(); i++)
+        iCellFccBulk[i] = iCellFccBulkVector[i];
+    std::cout << iCellFccBulk[0] << std::endl;
+    std::cout << iCellFccBulk[iCellFccBulkVector.size()-1] << std::endl;
+}
+
 #define GEOFLUID 19
 #define GEOSOLID 16
 
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index 1e74e9521..25d78f895 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -219,6 +219,7 @@ public:
     HOSTDEVICE uint getNumberOfNodesFC() const override;
     CUDA_HOST void getGridInterfaceIndices(uint* iCellCfc, uint* iCellCff, uint* iCellFcc, uint* iCellFcf) const override;
     CUDA_HOST static void getGridInterface(uint* gridInterfaceList, const uint* oldGridInterfaceList, uint size);
+    CUDA_HOST void getGridInterfaceIndicesFCCBorderBulk(uint *iCellFccBorder, uint &intFCBorderKfc, uint *&iCellFccBulk, uint &intFCBulkKfc, int level) const override;
 
     int* getNeighborsX() const override;
     int* getNeighborsY() const override;
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 18aab89a7..d28f7f4e4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -75,21 +75,17 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage
     {
         if (para->getUseStreams() && para->getNumprocs() > 1) {
         } else {
-            if (para->getKernelNeedsFluidNodeIndicesToRun()) {
-                fineToCoarseUsingIndex(para, level, -1);
+            //fineToCoarse(para, level);
 
-                prepareExchangeMultiGPU(para, level, -1);
-                exchangeMultiGPU(para, comm, cudaManager, level, -1);
+            fineToCoarseWithStream(para, level, para->getParD(level)->intFCBorder.ICellFCC,
+                                   para->getParD(level)->intFCBorder.kFC, -1);
+            fineToCoarseWithStream(para, level, para->getParD(level)->intFCBulk.ICellFCC,
+                                   para->getParD(level)->intFCBulk.kFC, -1);
 
-                coarseToFine(para, level);
-            } else {
-                fineToCoarse(para, level);
+            prepareExchangeMultiGPU(para, level, -1);
+            exchangeMultiGPU(para, comm, cudaManager, level, -1);
 
-                prepareExchangeMultiGPU(para, level, -1);
-                exchangeMultiGPU(para, comm, cudaManager, level, -1);
-
-                coarseToFine(para, level);
-            }
+            coarseToFine(para, level);
         }
     }
 }
@@ -110,8 +106,7 @@ void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, i
 }
 
 void collisionUsingIndex(Parameter *para, std::vector<std::shared_ptr<PorousMedia>> &pm, int level, unsigned int t,
-                          std::vector<SPtr<Kernel>> &kernels, 
-               uint *fluidNodeIndices, uint numberOfFluidNodes, int stream)
+                         std::vector<SPtr<Kernel>> &kernels, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream)
 {
     if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0)
         kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream);
@@ -1138,18 +1133,19 @@ void fineToCoarse(Parameter* para, int level)
 
 }
 
-void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex)
+void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex)
 {
     cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
-    ScaleFC_RhoSq_comp_27_Stream(
-        para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0], para->getParD(level)->neighborX_SP,
-        para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP, para->getParD(level + 1)->neighborX_SP,
-        para->getParD(level + 1)->neighborY_SP, para->getParD(level + 1)->neighborZ_SP,
-        para->getParD(level)->size_Mat_SP, para->getParD(level + 1)->size_Mat_SP, para->getParD(level)->evenOrOdd,
-        para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC,
-        para->getParD(level)->omega, para->getParD(level + 1)->omega, para->getParD(level)->vis,
-        para->getParD(level)->nx, para->getParD(level)->ny, para->getParD(level + 1)->nx, para->getParD(level + 1)->ny,
-        para->getParD(level)->numberofthreads, para->getParD(level)->offFC, stream);
+    getLastCudaError("ScaleFC27_RhoSq_comp execution failed");
+    ScaleFC_RhoSq_comp_27_Stream( para->getParD(level)->d0SP.f[0],      para->getParD(level+1)->d0SP.f[0], 
+							      para->getParD(level)->neighborX_SP,   para->getParD(level)->neighborY_SP,    para->getParD(level)->neighborZ_SP, 
+							      para->getParD(level+1)->neighborX_SP, para->getParD(level+1)->neighborY_SP,  para->getParD(level+1)->neighborZ_SP, 
+							      para->getParD(level)->size_Mat_SP,    para->getParD(level+1)->size_Mat_SP,   para->getParD(level)->evenOrOdd,
+                                  iCellFCC,                             para->getParD(level)->intFC.ICellFCF, 
+							      k_FC,                                 para->getParD(level)->omega,           para->getParD(level + 1)->omega, 
+							      para->getParD(level)->vis,            para->getParD(level)->nx,              para->getParD(level)->ny, 
+							      para->getParD(level+1)->nx,           para->getParD(level+1)->ny,            para->getParD(level)->numberofthreads,
+							      para->getParD(level)->offFC,          stream);
     getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed");
 
     //////////////////////////////////////////////////////////////////////////
@@ -1159,7 +1155,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex)
     if (para->getDiffOn()) {
         if (para->getDiffMod() == 7) {
             // TODO
-            printf("fineToCoarseUsingIndex Advection Diffusion not implemented");
+            printf("fineToCoarseWithStream Advection Diffusion not implemented");
             //ScaleFCThSMG7(para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0],
             //              para->getParD(level)->d7.f[0], para->getParD(level + 1)->d7.f[0],
             //              para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP,
@@ -1173,7 +1169,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex)
             //getLastCudaError("ScaleFCTh7 execution failed");
         } else if (para->getDiffMod() == 27) {
             // TODO
-            printf("fineToCoarseUsingIndex Advection Diffusion not implemented");
+            printf("fineToCoarseWithStream Advection Diffusion not implemented");
             //ScaleFCThS27(para->getParD(level)->d0SP.f[0], para->getParD(level + 1)->d0SP.f[0],
             //             para->getParD(level)->d27.f[0], para->getParD(level + 1)->d27.f[0],
             //             para->getParD(level)->neighborX_SP, para->getParD(level)->neighborY_SP,
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index d80a3bf6c..d4eda6f6c 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -40,10 +40,8 @@ extern "C" void calcMacroscopicQuantities(Parameter* para, int level);
 extern "C" void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
 
 extern "C" void fineToCoarse(Parameter* para, int level);
-extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, int stream = -1);
+extern "C" void fineToCoarseWithStream(Parameter *para, int level, uint *iCellFCC, uint k_FC, int streamIndex);
 
 extern "C" void coarseToFine(Parameter* para, int level);
-extern "C" void coarseToFineUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr,
-                                       uint numberOfFluidNodes = 0, int stream = -1);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index b96646142..0b71e98ae 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -920,6 +920,39 @@ void GridGenerator::allocArrays_OffsetScale()
         builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level);
         builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level);
         builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level);
+        
+        if (para->getUseStreams()) {
+            // split fine-to-coarse-coarse indices into border and bulk
+            para->getParH(level)->intFCBorder.ICellFCC = para->getParH(level)->intFC.ICellFCC; 
+            para->getParD(level)->intFCBorder.ICellFCC = para->getParD(level)->intFC.ICellFCC;
+            builder->getGridInterfaceIndicesFCCBorderBulk(para->getParH(level)->intFCBorder.ICellFCC, para->getParH(level)->intFCBorder.kFC, para->getParH(level)->intFCBulk.ICellFCC, para->getParH(level)->intFCBulk.kFC, level);
+            para->getParD(level)->intFCBorder.kFC = para->getParH(level)->intFCBorder.kFC;
+            para->getParD(level)->intFCBulk.kFC = para->getParH(level)->intFCBulk.kFC;
+            para->getParD(level)->intFCBulk.ICellFCC = para->getParD(level)->intFCBorder.ICellFCC + para->getParD(level)->intFCBulk.kFC;
+        }
+        std::cout << "sizeOld  " << para->getParH(level)->K_FC << std::endl;
+        std::cout << "sizeNew  " << para->getParH(level)->intFCBorder.kFC + para->getParH(level)->intFCBulk.kFC
+                  << " = border " << para->getParH(level)->intFCBorder.kFC << " + bulk "
+                  << para->getParH(level)->intFCBulk.kFC << std::endl;
+        std::cout << "first old  " << para->getParH(level)->intFC.ICellFCC[0] << std::endl;
+        std::cout << "first new  " << para->getParH(level)->intFCBorder.ICellFCC[0]
+                  << std::endl;
+        //std::cout << "last border old  " << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->intFCBorder.kFC - 1]
+        //          << std::endl; //if (para->getParH(level)->intFCBorder.kFC > 0)
+        //std::cout << "last border new  " << para->getParH(level)->intFCBorder.ICellFCC[para->getParH(level)->intFCBorder.kFC - 1]
+        //          << std::endl;
+        std::cout << "old pointer " << para->getParH(level)->intFC.ICellFCC << std::endl;
+        std::cout << "border pointer (= old pointer) " << para->getParH(level)->intFCBorder.ICellFCC << std::endl;
+        std::cout << "bulk pointer new " << para->getParH(level)->intFCBulk.ICellFCC << std::endl;
+        std::cout << "first bulk old  "
+                  << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->intFCBorder.kFC] << std::endl;
+        std::cout << "first bulk new  "
+                  << para->getParH(level)->intFCBulk.ICellFCC[0] << std::endl;
+        std::cout << "last bulk old  "
+                  << para->getParH(level)->intFC.ICellFCC[para->getParH(level)->K_FC - 1] << std::endl;
+        std::cout << "last bulk new  "
+                  << para->getParH(level)->intFCBulk.ICellFCC[para->getParH(level)->intFCBulk.kFC - 1] << std::endl;
+
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //copy
 		cudaMemoryManager->cudaCopyInterfaceCF(level);
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index a33b3b792..a116485ac 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -140,6 +140,11 @@ typedef struct ICellFC{
    uint kFC;
 } InterpolationCellFC;
 
+typedef struct ICellFCBB {
+    uint *ICellFCC;
+    uint kFC;
+} InterpolationCellFCBorderBulk;
+
 //Offset of the interface cells at the wall
 typedef struct OffCF{
    real* xOffCF;
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 38013f5a6..b9bc179e3 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -181,6 +181,9 @@ struct LBMSimulationParameter
     unsigned int mem_size_kCF;
     unsigned int mem_size_kFC;
 
+    InterpolationCellFCBorderBulk intFCBorder;
+    InterpolationCellFCBorderBulk intFCBulk;
+
     // offset//////////////////
     OffsetCF offCF;
     OffsetFC offFC;
-- 
GitLab