From 380e1b4ae153b54754662c4dbe8f57820e2c0203 Mon Sep 17 00:00:00 2001
From: Henry <henry.korb@geo.uu.se>
Date: Thu, 1 Sep 2022 10:55:36 +0200
Subject: [PATCH] bit of cleanup in PressBC

---
 src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu | 707 +++++++++-----------
 1 file changed, 300 insertions(+), 407 deletions(-)

diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index 0b291f8b9..29e82196b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -3,6 +3,7 @@
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
 #include "lbm/MacroscopicQuantities.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
 
 #include "KernelUtilities.h"
 
@@ -2795,11 +2796,13 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
 
 
 
-
-
+__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs)
+{
+   return f1[dir] * cs + (c1o1 - cs) * f[dir];
+}
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressNoRhoDevice27(  real* rhoBC,
+__global__ void QPressNoRhoDevice27( real* rhoBC,
 												 real* distributions, 
 												 int* k_Q, 
 												 int* k_N, 
@@ -2813,175 +2816,171 @@ __global__ void QPressNoRhoDevice27(  real* rhoBC,
                                      int direction)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
 
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned k = vf::gpu::getNodeIndex();
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
-   {
-      ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
-      //unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = KQK;
-      unsigned int kbsw = neighborZ[ksw];
-      ////////////////////////////////////////////////////////////////////////////////
-      //index1
-      unsigned int K1QK  = k_N[k];
-      //unsigned int k1zero= K1QK;
-      unsigned int k1e   = K1QK;
-      unsigned int k1w   = neighborX[K1QK];
-      unsigned int k1n   = K1QK;
-      unsigned int k1s   = neighborY[K1QK];
-      unsigned int k1t   = K1QK;
-      unsigned int k1b   = neighborZ[K1QK];
-      unsigned int k1sw  = neighborY[k1w];
-      unsigned int k1ne  = K1QK;
-      unsigned int k1se  = k1s;
-      unsigned int k1nw  = k1w;
-      unsigned int k1bw  = neighborZ[k1w];
-      unsigned int k1te  = K1QK;
-      unsigned int k1be  = k1b;
-      unsigned int k1tw  = k1w;
-      unsigned int k1bs  = neighborZ[k1s];
-      unsigned int k1tn  = K1QK;
-      unsigned int k1bn  = k1b;
-      unsigned int k1ts  = k1s;
-      unsigned int k1tse = k1s;
-      unsigned int k1bnw = k1bw;
-      unsigned int k1tnw = k1w;
-      unsigned int k1bse = k1bs;
-      unsigned int k1tsw = k1sw;
-      unsigned int k1bne = k1b;
-      unsigned int k1tne = K1QK;
-      unsigned int k1bsw = neighborZ[k1sw];
-      ////////////////////////////////////////////////////////////////////////////////
-      Distributions27 dist;
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);      
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (dist.f[DIR_P00])[k1e   ];
-      real f1_W    = (dist.f[DIR_M00])[k1w   ];
-      real f1_N    = (dist.f[DIR_0P0])[k1n   ];
-      real f1_S    = (dist.f[DIR_0M0])[k1s   ];
-      real f1_T    = (dist.f[DIR_00P])[k1t   ];
-      real f1_B    = (dist.f[DIR_00M])[k1b   ];
-      real f1_NE   = (dist.f[DIR_PP0])[k1ne  ];
-      real f1_SW   = (dist.f[DIR_MM0])[k1sw  ];
-      real f1_SE   = (dist.f[DIR_PM0])[k1se  ];
-      real f1_NW   = (dist.f[DIR_MP0])[k1nw  ];
-      real f1_TE   = (dist.f[DIR_P0P])[k1te  ];
-      real f1_BW   = (dist.f[DIR_M0M])[k1bw  ];
-      real f1_BE   = (dist.f[DIR_P0M])[k1be  ];
-      real f1_TW   = (dist.f[DIR_M0P])[k1tw  ];
-      real f1_TN   = (dist.f[DIR_0PP])[k1tn  ];
-      real f1_BS   = (dist.f[DIR_0MM])[k1bs  ];
-      real f1_BN   = (dist.f[DIR_0PM])[k1bn  ];
-      real f1_TS   = (dist.f[DIR_0MP])[k1ts  ];
-      //real f1_ZERO = (dist.f[DIR_000])[k1zero];
-      real f1_TNE  = (dist.f[DIR_PPP])[k1tne ];
-      real f1_TSW  = (dist.f[DIR_MMP])[k1tsw ];
-      real f1_TSE  = (dist.f[DIR_PMP])[k1tse ];
-      real f1_TNW  = (dist.f[DIR_MPP])[k1tnw ];
-      real f1_BNE  = (dist.f[DIR_PPM])[k1bne ];
-      real f1_BSW  = (dist.f[DIR_MMM])[k1bsw ];
-      real f1_BSE  = (dist.f[DIR_PMM])[k1bse ];
-      real f1_BNW  = (dist.f[DIR_MPM])[k1bnw ];
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (dist.f[DIR_P00])[ke   ];
-      real f_W    = (dist.f[DIR_M00])[kw   ];
-      real f_N    = (dist.f[DIR_0P0])[kn   ];
-      real f_S    = (dist.f[DIR_0M0])[ks   ];
-      real f_T    = (dist.f[DIR_00P])[kt   ];
-      real f_B    = (dist.f[DIR_00M])[kb   ];
-      real f_NE   = (dist.f[DIR_PP0])[kne  ];
-      real f_SW   = (dist.f[DIR_MM0])[ksw  ];
-      real f_SE   = (dist.f[DIR_PM0])[kse  ];
-      real f_NW   = (dist.f[DIR_MP0])[knw  ];
-      real f_TE   = (dist.f[DIR_P0P])[kte  ];
-      real f_BW   = (dist.f[DIR_M0M])[kbw  ];
-      real f_BE   = (dist.f[DIR_P0M])[kbe  ];
-      real f_TW   = (dist.f[DIR_M0P])[ktw  ];
-      real f_TN   = (dist.f[DIR_0PP])[ktn  ];
-      real f_BS   = (dist.f[DIR_0MM])[kbs  ];
-      real f_BN   = (dist.f[DIR_0PM])[kbn  ];
-      real f_TS   = (dist.f[DIR_0MP])[kts  ];
-      //real f_ZERO = (dist.f[DIR_000])[kzero];
-      real f_TNE  = (dist.f[DIR_PPP])[ktne ];
-      real f_TSW  = (dist.f[DIR_MMP])[ktsw ];
-      real f_TSE  = (dist.f[DIR_PMP])[ktse ];
-      real f_TNW  = (dist.f[DIR_MPP])[ktnw ];
-      real f_BNE  = (dist.f[DIR_PPM])[kbne ];
-      real f_BSW  = (dist.f[DIR_MMM])[kbsw ];
-      real f_BSE  = (dist.f[DIR_PMM])[kbse ];
-      real f_BNW  = (dist.f[DIR_MPM])[kbnw ];
-      //////////////////////////////////////////////////////////////////////////
+   if(k>=numberOfBCnodes) return;
 
-      //real vx1, vx2, vx3, drho;
-      //real vx1, vx2, vx3, drho, drho1;
-      //////////////////////////////////////////////////////////////////////////
-	  //Dichte
-    //   drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-    //             f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-    //             f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
-    //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-    //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-    //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
-      
-      //////////////////////////////////////////////////////////////////////////
-	  //Ux
+   ////////////////////////////////////////////////////////////////////////////////
+   //index
+   unsigned int KQK  = k_Q[k];
+   // unsigned int kzero= KQK;
+   unsigned int ke   = KQK;
+   unsigned int kw   = neighborX[KQK];
+   unsigned int kn   = KQK;
+   unsigned int ks   = neighborY[KQK];
+   unsigned int kt   = KQK;
+   unsigned int kb   = neighborZ[KQK];
+   unsigned int ksw  = neighborY[kw];
+   unsigned int kne  = KQK;
+   unsigned int kse  = ks;
+   unsigned int knw  = kw;
+   unsigned int kbw  = neighborZ[kw];
+   unsigned int kte  = KQK;
+   unsigned int kbe  = kb;
+   unsigned int ktw  = kw;
+   unsigned int kbs  = neighborZ[ks];
+   unsigned int ktn  = KQK;
+   unsigned int kbn  = kb;
+   unsigned int kts  = ks;
+   unsigned int ktse = ks;
+   unsigned int kbnw = kbw;
+   unsigned int ktnw = kw;
+   unsigned int kbse = kbs;
+   unsigned int ktsw = ksw;
+   unsigned int kbne = kb;
+   unsigned int ktne = KQK;
+   unsigned int kbsw = neighborZ[ksw];
+   ////////////////////////////////////////////////////////////////////////////////
+   //index1
+   unsigned int K1QK  = k_N[k];
+   //unsigned int k1zero= K1QK;
+   unsigned int k1e   = K1QK;
+   unsigned int k1w   = neighborX[K1QK];
+   unsigned int k1n   = K1QK;
+   unsigned int k1s   = neighborY[K1QK];
+   unsigned int k1t   = K1QK;
+   unsigned int k1b   = neighborZ[K1QK];
+   unsigned int k1sw  = neighborY[k1w];
+   unsigned int k1ne  = K1QK;
+   unsigned int k1se  = k1s;
+   unsigned int k1nw  = k1w;
+   unsigned int k1bw  = neighborZ[k1w];
+   unsigned int k1te  = K1QK;
+   unsigned int k1be  = k1b;
+   unsigned int k1tw  = k1w;
+   unsigned int k1bs  = neighborZ[k1s];
+   unsigned int k1tn  = K1QK;
+   unsigned int k1bn  = k1b;
+   unsigned int k1ts  = k1s;
+   unsigned int k1tse = k1s;
+   unsigned int k1bnw = k1bw;
+   unsigned int k1tnw = k1w;
+   unsigned int k1bse = k1bs;
+   unsigned int k1tsw = k1sw;
+   unsigned int k1bne = k1b;
+   unsigned int k1tne = K1QK;
+   unsigned int k1bsw = neighborZ[k1sw];
+   ////////////////////////////////////////////////////////////////////////////////
+   Distributions27 dist;
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);      
+   real f[27], f1[27]; 
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f1[DIR_P00] = (dist.f[DIR_P00])[k1e   ];
+   f1[DIR_M00] = (dist.f[DIR_M00])[k1w   ];
+   f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n   ];
+   f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s   ];
+   f1[DIR_00P] = (dist.f[DIR_00P])[k1t   ];
+   f1[DIR_00M] = (dist.f[DIR_00M])[k1b   ];
+   f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne  ];
+   f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw  ];
+   f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se  ];
+   f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw  ];
+   f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te  ];
+   f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw  ];
+   f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be  ];
+   f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw  ];
+   f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn  ];
+   f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs  ];
+   f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn  ];
+   f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts  ];
+   // f1[DIR_000] = (dist.f[DIR_000])[k1zero];
+   f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ];
+   f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ];
+   f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ];
+   f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ];
+   f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ];
+   f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ];
+   f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ];
+   f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f[DIR_P00] = (dist.f[DIR_P00])[ke   ];
+   f[DIR_M00] = (dist.f[DIR_M00])[kw   ];
+   f[DIR_0P0] = (dist.f[DIR_0P0])[kn   ];
+   f[DIR_0M0] = (dist.f[DIR_0M0])[ks   ];
+   f[DIR_00P] = (dist.f[DIR_00P])[kt   ];
+   f[DIR_00M] = (dist.f[DIR_00M])[kb   ];
+   f[DIR_PP0] = (dist.f[DIR_PP0])[kne  ];
+   f[DIR_MM0] = (dist.f[DIR_MM0])[ksw  ];
+   f[DIR_PM0] = (dist.f[DIR_PM0])[kse  ];
+   f[DIR_MP0] = (dist.f[DIR_MP0])[knw  ];
+   f[DIR_P0P] = (dist.f[DIR_P0P])[kte  ];
+   f[DIR_M0M] = (dist.f[DIR_M0M])[kbw  ];
+   f[DIR_P0M] = (dist.f[DIR_P0M])[kbe  ];
+   f[DIR_M0P] = (dist.f[DIR_M0P])[ktw  ];
+   f[DIR_0PP] = (dist.f[DIR_0PP])[ktn  ];
+   f[DIR_0MM] = (dist.f[DIR_0MM])[kbs  ];
+   f[DIR_0PM] = (dist.f[DIR_0PM])[kbn  ];
+   f[DIR_0MP] = (dist.f[DIR_0MP])[kts  ];
+   // f[DIR_000] = (dist.f[DIR_000])[kzero];
+   f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ];
+   f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ];
+   f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ];
+   f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ];
+   f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ];
+   f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ];
+   f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ];
+   f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ];
+   //////////////////////////////////////////////////////////////////////////
 
-	  //vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+   //real vx1, vx2, vx3, drho;
+   //real vx1, vx2, vx3, drho, drho1;
+   //////////////////////////////////////////////////////////////////////////
+   ////Dichte
+   //   drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
+   //             f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
+   //             f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
+   //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+   //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+   //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
+
+   //////////////////////////////////////////////////////////////////////////
+   ////Ux
+
+   //vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
    //               ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
    //               (f_E - f_W)) /(one + drho); 
 
 
-   //   vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+   //vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
    //               ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
    //               (f_N - f_S)) /(one + drho); 
 
-   //   vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+   //vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
    //               (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
    //               (f_T - f_B)) /(one + drho); 
 
 
-      //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
+   //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-   //   //////////////////////////////////////////////////////////////////////////
-	  ////real omega = om1;
+   //////////////////////////////////////////////////////////////////////////
+	////real omega = om1;
    //   real cusq  = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////T�st MK
-	  ////if(vx1 < zero) vx1 = zero;
+   ////T�st MK
+   ////if(vx1 < zero) vx1 = zero;
    //   //////////////////////////////////////////////////////////////////////////
    //   real fZERO = c8over27*  (drho1-(one + drho1)*(cusq))                                                           ;
    //   real fE    = c2over27*  (drho1+(one + drho1)*(three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq));
@@ -2994,77 +2993,75 @@ __global__ void QPressNoRhoDevice27(  real* rhoBC,
    //   real fSW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq));
    //   real fSE   = c1over54*  (drho1+(one + drho1)*(three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq));
    //   real fNW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq));
-   //   real fTE	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //with velocity
-	  //if(true){//vx1 >= zero){
-		 // real csMvx = one / sqrtf(three) - vx1;
-		 // //real csMvy = one / sqrtf(three) - vx2;
-		 // ///////////////////////////////////////////
-		 // // X
-		 // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
-		 // //////////////////////////////////////////////////////////////////////////
-	  //}
-	  //else
-	  //{
-		 // ///////////////////////////////////////////
-		 // // X
-		 // vx1   = vx1 * 0.9;
-		 // f_W   = f_E   - six * c2over27  * ( vx1        );
-		 // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
-		 // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
-		 // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
-		 // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
-		 // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
-		 // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //vx2   = vx2 * 0.9;
-		 // //f_S   = f_N   - six * c2over27  * (     vx2    );
-		 // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
-		 // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
-		 // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
-		 // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
-		 // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
-		 // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-	  //}
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  //   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
+   //   real fTE	  /////////////////////////////////////////////////////////////
+   //with velocity
+   //if(true){//vx1 >= zero){
+      // real csMvx = one / sqrtf(three) - vx1;
+      // //real csMvy = one / sqrtf(three) - vx2;
+      // ///////////////////////////////////////////
+      // // X
+      // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // ///////////////////////////////////////////
+      // // Y
+      // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
+      // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
+      // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
+      // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
+      // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
+      // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
+      // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
+      // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
+      // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
+      // //////////////////////////////////////////////////////////////////////////
+   //}
+   //else
+   //{
+      // ///////////////////////////////////////////
+      // // X
+      // vx1   = vx1 * 0.9;
+      // f_W   = f_E   - six * c2over27  * ( vx1        );
+      // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
+      // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
+      // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
+      // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
+      // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
+      // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
+      // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
+      // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
+      // ///////////////////////////////////////////
+      // // Y
+      // //vx2   = vx2 * 0.9;
+      // //f_S   = f_N   - six * c2over27  * (     vx2    );
+      // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
+      // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
+      // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
+      // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
+      // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
+      // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
+      // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
+      // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
+      // ///////////////////////////////////////////
+   //}
+   /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   //   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
    //   real fTN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq));
    //   real fBS   = c1over54*  (drho1+(one + drho1)*(three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq));
    //   real fBN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq));
@@ -3078,186 +3075,88 @@ __global__ void QPressNoRhoDevice27(  real* rhoBC,
    //   real fBSE  = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
    //   real fTNW  = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
 
-	   real cs = c1o1 / sqrtf(c3o1);
-	   /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	   //no velocity
-	   //////////////////////////////////////////
-      f_E    = f1_E   * cs + (c1o1 - cs) * f_E   ;
-      f_W    = f1_W   * cs + (c1o1 - cs) * f_W   ;
-      f_N    = f1_N   * cs + (c1o1 - cs) * f_N   ;
-      f_S    = f1_S   * cs + (c1o1 - cs) * f_S   ;
-      f_T    = f1_T   * cs + (c1o1 - cs) * f_T   ;
-      f_B    = f1_B   * cs + (c1o1 - cs) * f_B   ;
-      f_NE   = f1_NE  * cs + (c1o1 - cs) * f_NE  ;
-      f_SW   = f1_SW  * cs + (c1o1 - cs) * f_SW  ;
-      f_SE   = f1_SE  * cs + (c1o1 - cs) * f_SE  ;
-      f_NW   = f1_NW  * cs + (c1o1 - cs) * f_NW  ;
-      f_TE   = f1_TE  * cs + (c1o1 - cs) * f_TE  ;
-      f_BW   = f1_BW  * cs + (c1o1 - cs) * f_BW  ;
-      f_BE   = f1_BE  * cs + (c1o1 - cs) * f_BE  ;
-      f_TW   = f1_TW  * cs + (c1o1 - cs) * f_TW  ;
-      f_TN   = f1_TN  * cs + (c1o1 - cs) * f_TN  ;
-      f_BS   = f1_BS  * cs + (c1o1 - cs) * f_BS  ;
-      f_BN   = f1_BN  * cs + (c1o1 - cs) * f_BN  ;
-      f_TS   = f1_TS  * cs + (c1o1 - cs) * f_TS  ;
-      f_TNE  = f1_TNE * cs + (c1o1 - cs) * f_TNE ;
-      f_TSW  = f1_TSW * cs + (c1o1 - cs) * f_TSW ;
-      f_TSE  = f1_TSE * cs + (c1o1 - cs) * f_TSE ;
-      f_TNW  = f1_TNW * cs + (c1o1 - cs) * f_TNW ;
-      f_BNE  = f1_BNE * cs + (c1o1 - cs) * f_BNE ;
-      f_BSW  = f1_BSW * cs + (c1o1 - cs) * f_BSW ;
-      f_BSE  = f1_BSE * cs + (c1o1 - cs) * f_BSE ;
-      f_BNW  = f1_BNW * cs + (c1o1 - cs) * f_BNW ;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   real cs = c1o1 / sqrtf(c3o1);
 
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //with velocity
-	  //if(true){//vx1 >= zero){
-		 // real csMvx = one / sqrtf(three) - vx1;
-		 // //real csMvy = one / sqrtf(three) - vx2;
-		 // ///////////////////////////////////////////
-		 // // X
-		 // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
-		 // //////////////////////////////////////////////////////////////////////////
-	  //}
-	  //else
-	  //{
-		 // ///////////////////////////////////////////
-		 // // X
-		 // vx1   = vx1 * 0.9;
-		 // f_W   = f_E   - six * c2over27  * ( vx1        );
-		 // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
-		 // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
-		 // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
-		 // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
-		 // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
-		 // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //vx2   = vx2 * 0.9;
-		 // //f_S   = f_N   - six * c2over27  * (     vx2    );
-		 // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
-		 // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
-		 // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
-		 // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
-		 // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
-		 // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-	  //}
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   //////////////////////////////////////////////////////////////////////////
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+   switch(direction)
+   {
+      case MZZ:
+         (dist.f[DIR_P00])[ke   ] = computeOutflowDistribution(f, f1, DIR_P00, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         break;
 
-	  //////////////////////////////////////////////////////////////////////////
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
-      switch(direction)
-      {
-         case MZZ:
-            (dist.f[DIR_P00])[ke   ] = f_E   ;
-            (dist.f[DIR_PM0])[kse  ] = f_SE  ;
-            (dist.f[DIR_PP0])[kne  ] = f_NE  ;
-            (dist.f[DIR_P0M])[kbe  ] = f_BE  ;
-            (dist.f[DIR_P0P])[kte  ] = f_TE  ;
-            (dist.f[DIR_PMP])[ktse ] = f_TSE ;
-            (dist.f[DIR_PPP])[ktne ] = f_TNE ;
-            (dist.f[DIR_PMM])[kbse ] = f_BSE ;
-            (dist.f[DIR_PPM])[kbne ] = f_BNE ;
-            break;
-
-         case PZZ:
-            (dist.f[DIR_M00])[kw   ] = f_W   ;
-            (dist.f[DIR_MM0])[ksw  ] = f_SW  ;
-            (dist.f[DIR_MP0])[knw  ] = f_NW  ;
-            (dist.f[DIR_M0M])[kbw  ] = f_BW  ;
-            (dist.f[DIR_M0P])[ktw  ] = f_TW  ;
-            (dist.f[DIR_MMP])[ktsw ] = f_TSW ;
-            (dist.f[DIR_MPP])[ktnw ] = f_TNW ;
-            (dist.f[DIR_MMM])[kbsw ] = f_BSW ;
-            (dist.f[DIR_MPM])[kbnw ] = f_BNW ;
-            break;
-
-         case ZMZ:
-            (dist.f[DIR_0P0])[kn   ] = f_N   ;
-            (dist.f[DIR_PP0])[kne  ] = f_NE  ;
-            (dist.f[DIR_MP0])[knw  ] = f_NW  ;
-            (dist.f[DIR_0PP])[ktn  ] = f_TN  ;
-            (dist.f[DIR_0PM])[kbn  ] = f_BN  ;
-            (dist.f[DIR_PPP])[ktne ] = f_TNE ;
-            (dist.f[DIR_MPP])[ktnw ] = f_TNW ;
-            (dist.f[DIR_PPM])[kbne ] = f_BNE ;
-            (dist.f[DIR_MPM])[kbnw ] = f_BNW ;
-            break;  
-
-         case ZPZ:   
-            (dist.f[DIR_0M0])[ks   ] = f_S   ;
-            (dist.f[DIR_PM0])[kse  ] = f_SE  ;
-            (dist.f[DIR_MM0])[ksw  ] = f_SW  ;
-            (dist.f[DIR_0MP])[kts  ] = f_TS  ;
-            (dist.f[DIR_0MM])[kbs  ] = f_BS  ;
-            (dist.f[DIR_PMP])[ktse ] = f_TSE ;
-            (dist.f[DIR_MMP])[ktsw ] = f_TSW ;
-            (dist.f[DIR_PMM])[kbse ] = f_BSE ;
-            (dist.f[DIR_MMM])[kbsw ] = f_BSW ;
-            break;
-
-         case ZZM:
-            (dist.f[DIR_00P])[kt   ] = f_T   ;
-	         (dist.f[DIR_P0P])[kte  ] = f_TE  ;
-	         (dist.f[DIR_M0P])[ktw  ] = f_TW  ;
-	         (dist.f[DIR_0PP])[ktn  ] = f_TN  ;
-	         (dist.f[DIR_0MP])[kts  ] = f_TS  ;
-	         (dist.f[DIR_PPP])[ktne ] = f_TNE ;
-	         (dist.f[DIR_MPP])[ktnw ] = f_TNW ;
-	         (dist.f[DIR_PMP])[ktse ] = f_TSE ;
-	         (dist.f[DIR_MMP])[ktsw ] = f_TSW ; 
-            break;
-
-         case ZZP:
-	         (dist.f[DIR_00M])[kb   ] = f_B   ;
-	         (dist.f[DIR_P0M])[kbe  ] = f_BE  ;
-	         (dist.f[DIR_M0M])[kbw  ] = f_BW  ;
-	         (dist.f[DIR_0PM])[kbn  ] = f_BN  ;
-	         (dist.f[DIR_0MM])[kbs  ] = f_BS  ;
-	         (dist.f[DIR_PPM])[kbne ] = f_BNE ;
-	         (dist.f[DIR_MPM])[kbnw ] = f_BNW ;
-	         (dist.f[DIR_PMM])[kbse ] = f_BSE ;
-	         (dist.f[DIR_MMM])[kbsw ] = f_BSW ;     
-            break;
-         default:
-            break;
-      }
+      case PZZ:
+         (dist.f[DIR_M00])[kw   ] = computeOutflowDistribution(f, f1, DIR_M00, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;
+
+      case ZMZ:
+         (dist.f[DIR_0P0])[kn   ] = computeOutflowDistribution(f, f1, DIR_0P0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;  
+
+      case ZPZ:   
+         (dist.f[DIR_0M0])[ks   ] = computeOutflowDistribution(f, f1, DIR_0M0, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         break;
+
+      case ZZM:
+         (dist.f[DIR_00P])[kt   ] = computeOutflowDistribution(f, f1, DIR_00P, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); 
+         break;
+
+      case ZZP:
+         (dist.f[DIR_00M])[kb   ] = computeOutflowDistribution(f, f1, DIR_00M, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);     
+         break;
+      default:
+         break;
    }
 }
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 __host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight)
@@ -3280,14 +3179,8 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
                                      real densityCorrectionFactor)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned k = vf::gpu::getNodeIndex();
+   
    //////////////////////////////////////////////////////////////////////////
 
    if(k>=numberOfBCnodes) return;
-- 
GitLab