From 2ead72f4937aaaee33e4d7373c78f39179204bda Mon Sep 17 00:00:00 2001
From: "TESLA03\\Master" <a.wellmann@tu-bs.de>
Date: Thu, 24 Jun 2021 15:32:41 +0200
Subject: [PATCH] Fix cudaMalloc error in CalcMac27.cu

use new version of LBCalcMacCompSP27 again (reverts changes in bd2123ce58)
---
 src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu    | 219 ++----------------
 .../FluidFlow/Compressible/RunLBMKernel.cuh   |   6 +-
 .../Kernel/Utilities/DistributionHelper.cu    |   6 +-
 .../Kernel/Utilities/DistributionHelper.cuh   |   2 +-
 4 files changed, 29 insertions(+), 204 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index 547b20745..db2d523eb 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -42,14 +42,14 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
 
    const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
 
+   if(!(k < size_Mat) && !vf::gpu::isValidFluidNode(geoD[k]))
+      return;
+
    rhoD[k] = c0o1;
    vxD[k]  = c0o1;
    vyD[k]  = c0o1;
    vzD[k]  = c0o1;
 
-   if(!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k]))
-      return;
-
    vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ);
    const auto& distribution = distr_wrapper.distribution;
 
@@ -264,206 +264,29 @@ extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, re
                                              unsigned int *neighborZ, unsigned int size_Mat, real *distributions,
                                              bool isEvenTimestep)
 {
-    //const unsigned k = vf::gpu::getNodeIndex();
-
-    //pressD[k] = c0o1;
-    //rhoD[k]   = c0o1;
-    //vxD[k]    = c0o1;
-    //vyD[k]    = c0o1;
-    //vzD[k]    = c0o1;
-
-    //if (!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k]))
-    //    return;
-
-    //vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
-    //                                           neighborZ);
-    //const auto &distribution = distr_wrapper.distribution;
-
-    //rhoD[k]   = vf::lbm::getDensity(distribution.f);
-    //vxD[k]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]);
-    //vyD[k]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]);
-    //vzD[k]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]);
-    //pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); 
-
-
-   // old stuff
-   Distributions27 D;
-    if (isEvenTimestep == true)
-   {
-      D.f[dirE   ] = &distributions[dirE   *size_Mat];
-      D.f[dirW   ] = &distributions[dirW   *size_Mat];
-      D.f[dirN   ] = &distributions[dirN   *size_Mat];
-      D.f[dirS   ] = &distributions[dirS   *size_Mat];
-      D.f[dirT   ] = &distributions[dirT   *size_Mat];
-      D.f[dirB   ] = &distributions[dirB   *size_Mat];
-      D.f[dirNE  ] = &distributions[dirNE  *size_Mat];
-      D.f[dirSW  ] = &distributions[dirSW  *size_Mat];
-      D.f[dirSE  ] = &distributions[dirSE  *size_Mat];
-      D.f[dirNW  ] = &distributions[dirNW  *size_Mat];
-      D.f[dirTE  ] = &distributions[dirTE  *size_Mat];
-      D.f[dirBW  ] = &distributions[dirBW  *size_Mat];
-      D.f[dirBE  ] = &distributions[dirBE  *size_Mat];
-      D.f[dirTW  ] = &distributions[dirTW  *size_Mat];
-      D.f[dirTN  ] = &distributions[dirTN  *size_Mat];
-      D.f[dirBS  ] = &distributions[dirBS  *size_Mat];
-      D.f[dirBN  ] = &distributions[dirBN  *size_Mat];
-      D.f[dirTS  ] = &distributions[dirTS  *size_Mat];
-      D.f[dirZERO] = &distributions[dirZERO*size_Mat];
-      D.f[dirTNE ] = &distributions[dirTNE *size_Mat];
-      D.f[dirTSW ] = &distributions[dirTSW *size_Mat];
-      D.f[dirTSE ] = &distributions[dirTSE *size_Mat];
-      D.f[dirTNW ] = &distributions[dirTNW *size_Mat];
-      D.f[dirBNE ] = &distributions[dirBNE *size_Mat];
-      D.f[dirBSW ] = &distributions[dirBSW *size_Mat];
-      D.f[dirBSE ] = &distributions[dirBSE *size_Mat];
-      D.f[dirBNW ] = &distributions[dirBNW *size_Mat];
-   } 
-   else
-   {
-      D.f[dirW   ] = &distributions[dirE   *size_Mat];
-      D.f[dirE   ] = &distributions[dirW   *size_Mat];
-      D.f[dirS   ] = &distributions[dirN   *size_Mat];
-      D.f[dirN   ] = &distributions[dirS   *size_Mat];
-      D.f[dirB   ] = &distributions[dirT   *size_Mat];
-      D.f[dirT   ] = &distributions[dirB   *size_Mat];
-      D.f[dirSW  ] = &distributions[dirNE  *size_Mat];
-      D.f[dirNE  ] = &distributions[dirSW  *size_Mat];
-      D.f[dirNW  ] = &distributions[dirSE  *size_Mat];
-      D.f[dirSE  ] = &distributions[dirNW  *size_Mat];
-      D.f[dirBW  ] = &distributions[dirTE  *size_Mat];
-      D.f[dirTE  ] = &distributions[dirBW  *size_Mat];
-      D.f[dirTW  ] = &distributions[dirBE  *size_Mat];
-      D.f[dirBE  ] = &distributions[dirTW  *size_Mat];
-      D.f[dirBS  ] = &distributions[dirTN  *size_Mat];
-      D.f[dirTN  ] = &distributions[dirBS  *size_Mat];
-      D.f[dirTS  ] = &distributions[dirBN  *size_Mat];
-      D.f[dirBN  ] = &distributions[dirTS  *size_Mat];
-      D.f[dirZERO] = &distributions[dirZERO*size_Mat];
-      D.f[dirTNE ] = &distributions[dirBSW *size_Mat];
-      D.f[dirTSW ] = &distributions[dirBNE *size_Mat];
-      D.f[dirTSE ] = &distributions[dirBNW *size_Mat];
-      D.f[dirTNW ] = &distributions[dirBSE *size_Mat];
-      D.f[dirBNE ] = &distributions[dirTSW *size_Mat];
-      D.f[dirBSW ] = &distributions[dirTNE *size_Mat];
-      D.f[dirBSE ] = &distributions[dirTNW *size_Mat];
-      D.f[dirBNW ] = &distributions[dirTSE *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID)
-      {
-         rhoD[k]    =   (D.f[dirE   ])[ke  ]+ (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirN   ])[kn  ]+ (D.f[dirS   ])[ks  ]+
-                        (D.f[dirT   ])[kt  ]+ (D.f[dirB   ])[kb  ]+
-                        (D.f[dirNE  ])[kne ]+ (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]+ (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]+ (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                        (D.f[dirZERO])[kzero]+ 
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw];
+    const unsigned k = vf::gpu::getNodeIndex();
 
-         vxD[k]     =  ((D.f[dirE   ])[ke  ]- (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]- (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]- (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]- (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]+ 
-                        (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw])/(c1o1+rhoD[k]);
-
-         vyD[k]     =  ((D.f[dirN   ])[kn  ]- (D.f[dirS   ])[ks  ]+
-                        (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]-
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]- (D.f[dirTS  ])[kts ]+
-                        (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]- 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw])/(c1o1+rhoD[k]);
+    if (k >= size_Mat)
+        return;
 
-         vzD[k]     =  ((D.f[dirT   ])[kt  ]- (D.f[dirB   ])[kb  ]+
-                        (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]-
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]-
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]- 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                        (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw])/(c1o1+rhoD[k]);
+    if (!vf::gpu::isValidFluidNode(geoD[k]))
+        return;
 
-         pressD[k]  =  ((D.f[dirE   ])[ke  ]+ (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirN   ])[kn  ]+ (D.f[dirS   ])[ks  ]+
-                        (D.f[dirT   ])[kt  ]+ (D.f[dirB   ])[kb  ]+
-                        2.f*(
-                        (D.f[dirNE  ])[kne ]+ (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]+ (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]+ (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ])+
-                        3.f*(
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case   
-         //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
+    pressD[k] = c0o1;
+    rhoD[k]   = c0o1;
+    vxD[k]    = c0o1;
+    vyD[k]    = c0o1;
+    vzD[k]    = c0o1;
 
-      }
-   }
+    vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
+                                               neighborZ);
+    const auto &distribution = distr_wrapper.distribution;
 
+    rhoD[k]   = vf::lbm::getDensity(distribution.f);
+    vxD[k]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]);
+    vyD[k]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]);
+    vzD[k]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]);
+    pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); 
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
index b4097851b..cca9c3f40 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
@@ -32,9 +32,11 @@ template<typename KernelFunctor>
 __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter)
 {
     const uint k = getNodeIndex();
-    const uint nodeType = kernelParameter.typeOfGridNode[k];
 
-    if (!isValidFluidNode(k, kernelParameter.size_Mat, nodeType))
+    if (k >= kernelParameter.size_Mat)
+        return;
+
+    if (!isValidFluidNode(kernelParameter.typeOfGridNode[k]))
         return;
 
     DistributionWrapper distributionWrapper {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
index bbb01d954..ddabee4f5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -158,10 +158,10 @@ __device__ unsigned int getNodeIndex()
     return nx * (ny * z + y) + x;
 }
 
-__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType)
+
+__device__ bool isValidFluidNode(uint nodeType)
 {
-    return (k < size_Mat) &&
-           (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
+    return (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
index 935030701..6b38cac75 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -90,7 +90,7 @@ struct DistributionWrapper
 
 __device__ unsigned int getNodeIndex();
 
-__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType);
+__device__ bool isValidFluidNode(uint nodeType);
 
 }
 }
-- 
GitLab