From aa27b3cd3006ee505667202ba357b889db549b19 Mon Sep 17 00:00:00 2001
From: Anna Wellmann <a.wellmann@tu-braunschweig.de>
Date: Tue, 14 Sep 2021 10:16:54 +0200
Subject: [PATCH] Refactor new f to c kernel

---
 .../Calculation/UpdateGrid27.cpp              |  8 +--
 .../Calculation/UpdateGrid27.h                |  3 +-
 src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h |  4 +-
 src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh |  8 ---
 src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu    | 55 +++++++++++++------
 src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu    | 25 ---------
 6 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 4da3c6745..18aab89a7 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -76,8 +76,7 @@ void updateGrid27(Parameter *para, vf::gpu::Communicator *comm, CudaMemoryManage
         if (para->getUseStreams() && para->getNumprocs() > 1) {
         } else {
             if (para->getKernelNeedsFluidNodeIndicesToRun()) {
-                fineToCoarseUsingIndex(para, level, para->getParD(level)->fluidNodeIndices,
-                                       para->getParD(level)->numberOfFluidNodes, -1);
+                fineToCoarseUsingIndex(para, level, -1);
 
                 prepareExchangeMultiGPU(para, level, -1);
                 exchangeMultiGPU(para, comm, cudaManager, level, -1);
@@ -1139,8 +1138,7 @@ void fineToCoarse(Parameter* para, int level)
 
 }
 
-void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices, uint numberOfFluidNodes,
-                            int streamIndex)
+void fineToCoarseUsingIndex(Parameter *para, int level, int streamIndex)
 {
     cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
     ScaleFC_RhoSq_comp_27_Stream(
@@ -1151,7 +1149,7 @@ void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices,
         para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC,
         para->getParD(level)->omega, para->getParD(level + 1)->omega, para->getParD(level)->vis,
         para->getParD(level)->nx, para->getParD(level)->ny, para->getParD(level + 1)->nx, para->getParD(level + 1)->ny,
-        para->getParD(level)->numberofthreads, para->getParD(level)->offFC, fluidNodeIndices, numberOfFluidNodes, stream);
+        para->getParD(level)->numberofthreads, para->getParD(level)->offFC, stream);
     getLastCudaError("ScaleFC27_RhoSq_comp_Stream execution failed");
 
     //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index ef3cfab2d..d80a3bf6c 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -40,8 +40,7 @@ extern "C" void calcMacroscopicQuantities(Parameter* para, int level);
 extern "C" void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
 
 extern "C" void fineToCoarse(Parameter* para, int level);
-extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr,
-                                       uint numberOfFluidNodes = 0, int stream = -1);
+extern "C" void fineToCoarseUsingIndex(Parameter *para, int level, int stream = -1);
 
 extern "C" void coarseToFine(Parameter* para, int level);
 extern "C" void coarseToFineUsingIndex(Parameter *para, int level, uint *fluidNodeIndices = nullptr,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index 1220b5ef4..e23644514 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -2116,9 +2116,7 @@ extern "C" void ScaleFC_RhoSq_comp_27_Stream(	real* DC,
 												unsigned int nxF, 
 												unsigned int nyF, 
 											    unsigned int numberOfThreads,
-												OffFC offFC, 
-												unsigned int *fluidNodeIndices, 
-											    unsigned int numberOfFluidNodes,                                                
+												OffFC offFC,                                              
 												CUstream_st *stream);
 
 extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index fc35b1c66..288db43e7 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -2049,14 +2049,6 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void
-scaleFC_RhoSq_comp_27_Stream(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY,
-                             unsigned int *neighborCZ, unsigned int *neighborFX, unsigned int *neighborFY,
-                             unsigned int *neighborFZ, unsigned int size_MatC, unsigned int size_MatF, bool evenOrOdd,
-                             unsigned int *posC, unsigned int *posFSWB, unsigned int kFC, real omCoarse, real omFine,
-                             real nu, unsigned int nxC, unsigned int nyC, unsigned int nxF, unsigned int nyF,
-                             OffFC offFC, const unsigned int *fluidNodeIndices, unsigned int numberOfFluidNodes);
-
 extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, 
 														real* DF, 
 														unsigned int* neighborCX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index feab617a2..3885c24dc 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -6265,25 +6265,46 @@ extern "C" void ScaleFC_RhoSq_comp_27_Stream(real * DC,
 											 unsigned int nyF, 
 											 unsigned int numberOfThreads,
 											 OffFC offFC,
-											 unsigned int *fluidNodeIndices,
-											 unsigned int numberOfFluidNodes, 
 											 CUstream_st* stream)
 {
-    int Grid = (kFC / numberOfThreads) + 1;
-    int Grid1, Grid2;
-    if (Grid > 512) {
-        Grid1 = 512;
-        Grid2 = (Grid / Grid1) + 1;
-    } else {
-        Grid1 = 1;
-        Grid2 = Grid;
-    }
-    dim3 gridINT_FC(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1);
-
-    scaleFC_RhoSq_comp_27_Stream<<<gridINT_FC, threads, 0 , stream>>>(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY, neighborFZ, size_MatC, size_MatF, evenOrOdd,
-        posC, posFSWB, kFC, omCoarse, omFine, nu, nxC, nyC, nxF, nyF, offFC, fluidNodeIndices, numberOfFluidNodes);
-    getLastCudaError("scaleFC_RhoSq_comp_27_Stream execution failed");
+   int Grid = (kFC / numberOfThreads)+1;
+   int Grid1, Grid2;
+   if (Grid>512)
+   {
+      Grid1 = 512;
+      Grid2 = (Grid/Grid1)+1;
+   } 
+   else
+   {
+      Grid1 = 1;
+      Grid2 = Grid;
+   }
+   dim3 gridINT_FC(Grid1, Grid2);
+   dim3 threads(numberOfThreads, 1, 1 );
+
+   scaleFC_RhoSq_comp_27<<< gridINT_FC, threads, 0, stream >>>(DC, 
+										                       DF, 
+										                       neighborCX,
+										                       neighborCY,
+										                       neighborCZ,
+										                       neighborFX,
+										                       neighborFY,
+										                       neighborFZ,
+										                       size_MatC, 
+										                       size_MatF, 
+										                       evenOrOdd,
+										                       posC, 
+										                       posFSWB, 
+										                       kFC, 
+										                       omCoarse, 
+										                       omFine, 
+										                       nu, 
+										                       nxC, 
+										                       nyC, 
+										                       nxF, 
+										                       nyF,
+										                       offFC);
+   getLastCudaError("scaleFC_RhoSq_27 execution failed"); 
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
index a56c75e58..f61a6f980 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
@@ -11094,31 +11094,6 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC,
                                      nyC, nxF, nyF, offFC, k);
 }
 
-extern "C" __global__ void scaleFC_RhoSq_comp_27_Stream(real *DC, real *DF, unsigned int *neighborCX, unsigned int *neighborCY,
-                                                        unsigned int *neighborCZ, unsigned int *neighborFX,
-                                                        unsigned int *neighborFY, unsigned int *neighborFZ,
-                                                        unsigned int size_MatC, unsigned int size_MatF, bool evenOrOdd,
-                                                        unsigned int *posC, unsigned int *posFSWB, unsigned int kFC,
-                                                        real omCoarse, real omFine, real nu, unsigned int nxC, unsigned int nyC, unsigned int nxF, unsigned int nyF,
-                                                        OffFC offFC, const unsigned int *fluidNodeIndices, unsigned int numberOfFluidNodes)
-{
-    ////////////////////////////////////////////////////////////////////////////////
-    const unsigned ix = threadIdx.x; // Globaler x-Index
-    const unsigned iy = blockIdx.x;  // Globaler y-Index
-    const unsigned iz = blockIdx.y;  // Globaler z-Index
-
-    const unsigned nx = blockDim.x;
-    const unsigned ny = gridDim.x;
-
-    const unsigned k_thread = nx * (ny * iz + iy) + ix;
-
-    if (k_thread < numberOfFluidNodes) {
-        const unsigned k = fluidNodeIndices[k_thread]; 
-        scaleFC_RhoSq_comp_27_Calculation(DC, DF, neighborCX, neighborCY, neighborCZ, neighborFX, neighborFY,
-                                          neighborFZ, size_MatC, size_MatF, evenOrOdd, posC, posFSWB, kFC, omCoarse,
-                                          omFine, nu, nxC, nyC, nxF, nyF, offFC, k);
-    }
-}
 
 
 
-- 
GitLab