diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 18be4504c9bad222377360a0376b2190736ced3c..d063ed5bda541a13006d57692d8d93612e4948f5 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -11,5 +11,10 @@
         "streetsidesoftware.code-spell-checker"
     ],
     "runArgs": ["--gpus","all"],
-    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.3"
+    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.3",
+    "containerEnv": {"HOME": "/workspaces/"},
+    "mounts": [
+        "source=/mnt/d/output,target=/${containerWorkspaceFolder}/output,type=bind", 
+        "source=${localEnv:HOME}/STLs,target=${containerWorkspaceFolder}/stl,type=bind"    
+    ]
 }
diff --git a/.gitignore b/.gitignore
index d16238c4ac7d45440117af9bc7ac0479a90dae2d..f05ddbd29708e88ed8a0e886626a12d26f3cff7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ __pycache__/
 .vscode-server/
 .sync/
 .idea/
+.ccache/
+.cache/
 
 # simulation results
 output/
@@ -29,3 +31,6 @@ stl/
 
 # MacOS
 .DS_Store
+
+# Settings
+.gitconfig
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
index 97f5b5d8079e6b937863cca09e6076e0a753af23..f37aac7034140d6d288852f42583c8cce7b840e9 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
@@ -31,6 +31,6 @@ GridPath=/workspaces/VirtualFluids_dev/output/DrivenCavity_Results/grid/  # Arag
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=1000
-TimeOut=100
+TimeEnd=100000
+TimeOut=10000
 #TimeStartOut=0
\ No newline at end of file
diff --git a/gpu.cmake b/gpu.cmake
index 5b70ed9da9cb5f0ac56d09a0f91f0a6b6d13b89a..56f731d92a930f4af7eb62e164ed26d905c393db 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -36,9 +36,9 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
-	#add_subdirectory(apps/gpu/LBM/SphereScaling)
-    #add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU)
-	#add_subdirectory(apps/gpu/LBM/MusselOyster)
+	add_subdirectory(apps/gpu/LBM/SphereScaling)
+    add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU)
+	add_subdirectory(apps/gpu/LBM/MusselOyster)
     #add_subdirectory(apps/gpu/LBM/Poiseuille)
     #add_subdirectory(apps/gpu/LBM/ActuatorLine)
     #add_subdirectory(apps/gpu/LBM/BoundaryLayer)
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
index d90ad6474da28194526feb2b4203f5e42eb313c9..648fe6cc9c8fffc7574bc60c89e194937a7974e1 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
@@ -55,7 +55,7 @@ void calcDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev
 	//double LBtoSI = 1.204 * (pow(delta_x, 4))/(pow(delta_t,2));//rho_SI * delta_x^4 / delta_t^2 = 1.204 kg/m^3 * (0.0045m)^4 / (0.00000757s)^2 ... LB to kg*m/s^2
 	//double LBtoSI = 1000 * (pow(delta_x, 4))/(pow(delta_t,2));//rho_SI * delta_x^4 / delta_t^2 = 1000 kg/m^3 * (0.1m)^4 / (0.00187s)^2 ... LB to kg*m/s^2
 
-	for (int it = 0; it < para->getParH(lev)->geometryBC.numberOfBCnodes; it++)
+	for (unsigned int it = 0; it < para->getParH(lev)->geometryBC.numberOfBCnodes; it++)
 	{
 		dragX += (double) (para->getParH(lev)->DragPreX[it] - para->getParH(lev)->DragPostX[it]); //Kraft da Impuls pro Zeitschritt merke: andere nennen es FD
 		dragY += (double) (para->getParH(lev)->DragPreY[it] - para->getParH(lev)->DragPostY[it]); //Kraft da Impuls pro Zeitschritt merke: andere nennen es FD
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
index e1f686cc5b22849fd1853fc3f8edf970ab9ff82b..be90565d71a11054a05d663291f5353f9d61f624 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
@@ -353,7 +353,7 @@ void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep
 	//////////////////////////////////////////////////////////////////////////
 	//fill file with data
 	bool doNothing = false;
-	for (int i = 0; i < para->getParH(lev)->pressureBC.numberOfBCnodes; i++)
+	for (unsigned int i = 0; i < para->getParH(lev)->pressureBC.numberOfBCnodes; i++)
 	{
 		doNothing = false;
 		for (std::size_t j = 0; j < 27; j++)
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 69fc2f70f6d8ca89f36b9a1fb731a1302df0681d..559abf12377f8e0b587753f1d225da8944d843fd 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -117,8 +117,6 @@ void GridProvider::setVelocitySizePerLevel(int level, int sizePerLevel) const
 {
     para->getParH(level)->velocityBC.numberOfBCnodes = sizePerLevel;
     para->getParD(level)->velocityBC.numberOfBCnodes = sizePerLevel;
-    para->getParH(level)->numberOfVeloBCnodes = sizePerLevel;
-    para->getParD(level)->numberOfVeloBCnodes = sizePerLevel;
     para->getParH(level)->numberOfVeloBCnodesRead = sizePerLevel * para->getD3Qxx();
     para->getParD(level)->numberOfVeloBCnodesRead = sizePerLevel * para->getD3Qxx();
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index 6cd1040807249bf24bb472da81e26ba5bf23064b..26b6603c2f065661fe1d1f0047d01701d748a114 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -764,8 +764,6 @@ void GridReader::setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned i
 {
 	para->getParH(level)->noSlipBC.numberOfBCnodes = boundaryQ->getSize(level);
 	para->getParD(level)->noSlipBC.numberOfBCnodes = para->getParH(level)->noSlipBC.numberOfBCnodes;
-	para->getParH(level)->numberOfNoSlipBCnodes = para->getParH(level)->noSlipBC.numberOfBCnodes;
-	para->getParD(level)->numberOfNoSlipBCnodes = para->getParH(level)->noSlipBC.numberOfBCnodes;
     cudaMemoryManager->cudaAllocWallBC(level);
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 8a23d8c2633ba87f6911693eeb5130f08fc73cc4..a9a315d36f3bb3c93f8342182add6ffcf9810daa 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -139,8 +139,6 @@ void GridGenerator::allocArrays_BoundaryValues()
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->slipBC.numberOfBCnodes = numberOfSlipValues;
         para->getParD(level)->slipBC.numberOfBCnodes = numberOfSlipValues;
-        para->getParH(level)->numberOfSlipBCnodes   = numberOfSlipValues;
-        para->getParD(level)->numberOfSlipBCnodes   = numberOfSlipValues;
         para->getParH(level)->numberOfSlipBCnodesRead = numberOfSlipValues * para->getD3Qxx();
         para->getParD(level)->numberOfSlipBCnodesRead = numberOfSlipValues * para->getD3Qxx();
         if (numberOfSlipValues > 1)
@@ -158,8 +156,6 @@ void GridGenerator::allocArrays_BoundaryValues()
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->stressBC.numberOfBCnodes = numberOfStressValues;
         para->getParD(level)->stressBC.numberOfBCnodes = numberOfStressValues;
-        para->getParH(level)->numberOfStressBCnodes   = numberOfStressValues;
-        para->getParD(level)->numberOfStressBCnodes   = numberOfStressValues;
         para->getParH(level)->numberOfStressBCnodesRead = numberOfStressValues * para->getD3Qxx();
         para->getParD(level)->numberOfStressBCnodesRead = numberOfStressValues * para->getD3Qxx();
 
@@ -190,8 +186,6 @@ void GridGenerator::allocArrays_BoundaryValues()
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->velocityBC.numberOfBCnodes = numberOfVelocityValues;
         para->getParD(level)->velocityBC.numberOfBCnodes = numberOfVelocityValues;
-        para->getParH(level)->numberOfVeloBCnodes = numberOfVelocityValues;
-        para->getParD(level)->numberOfVeloBCnodes = numberOfVelocityValues;
         para->getParH(level)->numberOfVeloBCnodesRead = numberOfVelocityValues * para->getD3Qxx();
         para->getParD(level)->numberOfVeloBCnodesRead = numberOfVelocityValues * para->getD3Qxx();
 
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
index 7333a7fdd3427aef3ab7999b3698fa900c8fd209..a67f1d987cb9636ee447c5f5acd1410c44cb6a62 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
@@ -30,20 +30,14 @@ void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
    for (int lev = para->getFine(); lev >= para->getCoarse(); lev--)
    {
       findKforQ(para, lev);
-
-      para->getParH(lev)->numberOfNoSlipBCnodes       = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-	  para->getParD(lev)->numberOfNoSlipBCnodes       = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-	  para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-      printf("numberOfBCnodes= %d\n", para->getParH(lev)->numberOfNoSlipBCnodes);
+	   para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
+      printf("numberOfBCnodes= %d\n", para->getParH(lev)->noSlipBC.numberOfBCnodes);
 
 	  cudaMemoryManager->cudaAllocWallBC(lev);
 
       findQ(para, lev);
-
-	  para->getParH(lev)->numberOfNoSlipBCnodes       = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-	  para->getParD(lev)->numberOfNoSlipBCnodes       = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-	  para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
-      printf("numberOfBCnodes= %d\n", para->getParH(lev)->numberOfNoSlipBCnodes);
+ 	  para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
+      printf("numberOfBCnodes= %d\n", para->getParH(lev)->noSlipBC.numberOfBCnodes);
 
 	  cudaMemoryManager->cudaCopyWallBC(lev);
    }
@@ -60,9 +54,7 @@ void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
       //Inflow
       findKforQInflow(para);
 
-      para->getParH(para->getCoarse())->numberOfVeloBCnodes = para->getParH(para->getCoarse())->velocityBC.numberOfBCnodes;
-	  para->getParD(para->getCoarse())->numberOfVeloBCnodes = para->getParH(para->getCoarse())->velocityBC.numberOfBCnodes;
-      printf("numberOfVeloBCnodes= %d\n", para->getParH(para->getCoarse())->numberOfVeloBCnodes);
+      printf("numberOfVeloBCnodes= %d\n", para->getParH(para->getCoarse())->velocityBC.numberOfBCnodes);
 
 	  cudaMemoryManager->cudaAllocVeloBC(0); //level = 0
 
@@ -79,9 +71,7 @@ void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
    //   //Outflow
 	  // findKforQOutflow(para);
 
-	  // para->getParH(para->getCoarse())->numberOfOutflowBCnodes = para->getParH(para->getCoarse())->outflowBC.numberOfBCnodes;
-	  // para->getParD(para->getCoarse())->numberOfOutflowBCnodes = para->getParH(para->getCoarse())->outflowBC.numberOfBCnodes;
-	  // printf("numberOfOutflowBCnodes= %d\n", para->getParH(para->getCoarse())->numberOfOutflowBCnodes);
+	  // printf("numberOfOutflowBCnodes= %d\n", para->getParH(para->getCoarse())->outflowBC.numberOfBCnodes);
 
 	  // para->cudaAllocPressBC();
 
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
index 3c7570f7c6e5c641f5ff0397afc5a0ba14b9372d..5ec1d279e427a5e02454aaeaadb753dbce1f6b5c 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
@@ -19,8 +19,8 @@ void findQ(Parameter* para, int lev)
    unsigned int nnz             = para->getParH(lev)->gridNZ;
    int* geo_mat                 = para->getParH(lev)->geo;
    unsigned int* kk             = para->getParH(para->getCoarse())->k;
-   unsigned int sizeQ           = para->getParH(lev)->numberOfNoSlipBCnodes;
-   real* QQ                  = para->getParH(lev)->noSlipBC.q27[0];
+   unsigned int sizeQ           = para->getParH(lev)->noSlipBC.numberOfBCnodes;
+   real* QQ                     = para->getParH(lev)->noSlipBC.q27[0];
    QforBoundaryConditions &QIN  = para->getParH(lev)->noSlipBC;
    QIN.numberOfBCnodes = 0;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -409,7 +409,7 @@ void findQInflow(Parameter* para)
    unsigned int nnz              = para->getParH(para->getCoarse())->gridNZ;
    int* geo_mat                  = para->getParH(para->getCoarse())->geo;
    unsigned int* kk              = para->getParH(para->getCoarse())->k;
-   unsigned int sizeQ            = para->getParH(para->getCoarse())->numberOfVeloBCnodes;
+   unsigned int sizeQ            = para->getParH(para->getCoarse())->velocityBC.numberOfBCnodes;
    //real* rhoBC                = para->getParH(para->getCoarse())->velocityBC.RhoBC;
    real u0                    = para->getVelocity();
    real* vx                   = para->getParH(para->getCoarse())->velocityBC.Vx;
@@ -759,7 +759,7 @@ void findQOutflow(Parameter* para)
    unsigned int nnz              = para->getParH(para->getCoarse())->gridNZ;
    int* geo_mat                  = para->getParH(para->getCoarse())->geo;
    unsigned int* kk              = para->getParH(para->getCoarse())->k;
-   unsigned int sizeQ            = para->getParH(para->getCoarse())->numberOfOutflowBCnodes;
+   unsigned int sizeQ            = para->getParH(para->getCoarse())->outflowBC.numberOfBCnodes;
    real* rhoBC                = para->getParH(para->getCoarse())->outflowBC.RhoBC;
    real u0                    = para->getVelocity();
    real* vx                   = para->getParH(para->getCoarse())->outflowBC.Vx;
@@ -1030,7 +1030,7 @@ void findQPressX0(Parameter* para, int lev)
 	unsigned int nnz              = para->getParH(lev)->gridNZ;
 	int* geo_mat                  = para->getParH(lev)->geo;
 	unsigned int* kk              = para->getParH(lev)->k;
-	//unsigned int sizeQ            = para->getParH(lev)->numberOfOutflowBCnodes;
+	//unsigned int sizeQ            = para->getParH(lev)->outflowBC.numberOfBCnodes;
 	unsigned int sizeQ            = para->getParH(lev)->QpressX0.numberOfBCnodes;
 	real* rhoBC                = para->getParH(lev)->QpressX0.RhoBC;
 	real u0                    = para->getVelocity();
@@ -1196,7 +1196,7 @@ void findQPressX1(Parameter* para, int lev)
 	unsigned int nnz              = para->getParH(lev)->gridNZ;
 	int* geo_mat                  = para->getParH(lev)->geo;
 	unsigned int* kk              = para->getParH(lev)->k;
-	//unsigned int sizeQ            = para->getParH(lev)->numberOfOutflowBCnodes;
+	//unsigned int sizeQ            = para->getParH(lev)->outflowBC.numberOfBCnodes;
 	unsigned int sizeQ            = para->getParH(lev)->QpressX1.numberOfBCnodes;
 	real* rhoBC                = para->getParH(lev)->QpressX1.RhoBC;
 	real u0                    = para->getVelocity();
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
index 9d7cd2169405acc94ece2d498a82aecaff8b1410..eb5f1bab97f634fba03ee5f54e986dfe3a13225b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
@@ -7,17 +7,14 @@
 using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress7(  int inx,
-                                       int iny,
-                                       real* DD, 
+extern "C" __global__ void QADPress7(  real* DD, 
                                        real* DD7, 
                                        real* temp,
                                        real* velo,
                                        real diffusivity,
                                        int* k_Q, 
                                        real* QQ,
-                                       unsigned int sizeQ,
-                                       int numberOfBCnodes, 
+                                       unsigned int numberOfBCnodes, 
                                        real om1, 
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
@@ -130,32 +127,32 @@ extern "C" __global__ void QADPress7(  int inx,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      //q_dirNE  = &QQ[dirNE  *sizeQ];
-      //q_dirSW  = &QQ[dirSW  *sizeQ];
-      //q_dirSE  = &QQ[dirSE  *sizeQ];
-      //q_dirNW  = &QQ[dirNW  *sizeQ];
-      //q_dirTE  = &QQ[dirTE  *sizeQ];
-      //q_dirBW  = &QQ[dirBW  *sizeQ];
-      //q_dirBE  = &QQ[dirBE  *sizeQ];
-      //q_dirTW  = &QQ[dirTW  *sizeQ];
-      //q_dirTN  = &QQ[dirTN  *sizeQ];
-      //q_dirBS  = &QQ[dirBS  *sizeQ];
-      //q_dirBN  = &QQ[dirBN  *sizeQ];
-      //q_dirTS  = &QQ[dirTS  *sizeQ];
-      //q_dirTNE = &QQ[dirTNE *sizeQ];
-      //q_dirTSW = &QQ[dirTSW *sizeQ];
-      //q_dirTSE = &QQ[dirTSE *sizeQ];
-      //q_dirTNW = &QQ[dirTNW *sizeQ];
-      //q_dirBNE = &QQ[dirBNE *sizeQ];
-      //q_dirBSW = &QQ[dirBSW *sizeQ];
-      //q_dirBSE = &QQ[dirBSE *sizeQ];
-      //q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      //q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      //q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      //q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      //q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      //q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      //q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      //q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      //q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      //q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      //q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      //q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      //q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      //q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      //q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      //q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      //q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      //q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      //q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      //q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      //q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -451,17 +448,14 @@ extern "C" __global__ void QADPress7(  int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress27( int inx,
-                                       int iny,
-                                       real* DD, 
+extern "C" __global__ void QADPress27( real* DD, 
                                        real* DD27, 
                                        real* temp,
                                        real* velo,
                                        real diffusivity,
                                        int* k_Q, 
                                        real* QQ,
-                                       unsigned int sizeQ,
-                                       int numberOfBCnodes, 
+                                       unsigned int numberOfBCnodes, 
                                        real om1, 
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
@@ -611,32 +605,32 @@ extern "C" __global__ void QADPress27( int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1420,17 +1414,14 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel7( int inx,
-                                    int iny,
-                                    real* DD, 
+extern "C" __global__ void QADVel7( real* DD, 
                                     real* DD7, 
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q, 
                                     real* QQ,
-                                    unsigned int sizeQ,
-                                    int numberOfBCnodes, 
+                                    unsigned int numberOfBCnodes, 
                                     real om1, 
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
@@ -1538,37 +1529,13 @@ extern "C" __global__ void QADVel7( int inx,
    {
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;//, 
-      //         *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-      //         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-      //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-      //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      //q_dirNE  = &QQ[dirNE  *sizeQ];
-      //q_dirSW  = &QQ[dirSW  *sizeQ];
-      //q_dirSE  = &QQ[dirSE  *sizeQ];
-      //q_dirNW  = &QQ[dirNW  *sizeQ];
-      //q_dirTE  = &QQ[dirTE  *sizeQ];
-      //q_dirBW  = &QQ[dirBW  *sizeQ];
-      //q_dirBE  = &QQ[dirBE  *sizeQ];
-      //q_dirTW  = &QQ[dirTW  *sizeQ];
-      //q_dirTN  = &QQ[dirTN  *sizeQ];
-      //q_dirBS  = &QQ[dirBS  *sizeQ];
-      //q_dirBN  = &QQ[dirBN  *sizeQ];
-      //q_dirTS  = &QQ[dirTS  *sizeQ];
-      //q_dirTNE = &QQ[dirTNE *sizeQ];
-      //q_dirTSW = &QQ[dirTSW *sizeQ];
-      //q_dirTSE = &QQ[dirTSE *sizeQ];
-      //q_dirTNW = &QQ[dirTNW *sizeQ];
-      //q_dirBNE = &QQ[dirBNE *sizeQ];
-      //q_dirBSW = &QQ[dirBSW *sizeQ];
-      //q_dirBSE = &QQ[dirBSE *sizeQ];
-      //q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1864,16 +1831,13 @@ extern "C" __global__ void QADVel7( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel27(int inx,
-                                    int iny,
-                                    real* DD, 
+extern "C" __global__ void QADVel27(real* DD, 
                                     real* DD27, 
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q, 
                                     real* QQ,
-                                    unsigned int sizeQ,
                                     int numberOfBCnodes, 
                                     real om1, 
                                     unsigned int* neighborX,
@@ -2024,32 +1988,32 @@ extern "C" __global__ void QADVel27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2455,16 +2419,13 @@ extern "C" __global__ void QADVel27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QAD7( int inx,
-                                 int iny,
-                                 real* DD, 
+extern "C" __global__ void QAD7( real* DD, 
                                  real* DD7, 
                                  real* temp,
                                  real diffusivity,
                                  int* k_Q, 
                                  real* QQ,
-                                 unsigned int sizeQ,
-                                 int numberOfBCnodes, 
+                                 unsigned int numberOfBCnodes, 
                                  real om1, 
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
@@ -2577,32 +2538,32 @@ extern "C" __global__ void QAD7( int inx,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      //q_dirNE  = &QQ[dirNE  *sizeQ];
-      //q_dirSW  = &QQ[dirSW  *sizeQ];
-      //q_dirSE  = &QQ[dirSE  *sizeQ];
-      //q_dirNW  = &QQ[dirNW  *sizeQ];
-      //q_dirTE  = &QQ[dirTE  *sizeQ];
-      //q_dirBW  = &QQ[dirBW  *sizeQ];
-      //q_dirBE  = &QQ[dirBE  *sizeQ];
-      //q_dirTW  = &QQ[dirTW  *sizeQ];
-      //q_dirTN  = &QQ[dirTN  *sizeQ];
-      //q_dirBS  = &QQ[dirBS  *sizeQ];
-      //q_dirBN  = &QQ[dirBN  *sizeQ];
-      //q_dirTS  = &QQ[dirTS  *sizeQ];
-      //q_dirTNE = &QQ[dirTNE *sizeQ];
-      //q_dirTSW = &QQ[dirTSW *sizeQ];
-      //q_dirTSE = &QQ[dirTSE *sizeQ];
-      //q_dirTNW = &QQ[dirTNW *sizeQ];
-      //q_dirBNE = &QQ[dirBNE *sizeQ];
-      //q_dirBSW = &QQ[dirBSW *sizeQ];
-      //q_dirBSE = &QQ[dirBSE *sizeQ];
-      //q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      //q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      //q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      //q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      //q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      //q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      //q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      //q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      //q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      //q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      //q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      //q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      //q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      //q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      //q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      //q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      //q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      //q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      //q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      //q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      //q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2890,16 +2851,14 @@ extern "C" __global__ void QAD7( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADDirichlet27(	 int inx,
-											 int iny,
+extern "C" __global__ void QADDirichlet27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q, 
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes, 
+											 unsigned int numberOfBCnodes, 
 											 real om1, 
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -3049,32 +3008,32 @@ extern "C" __global__ void QADDirichlet27(	 int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3418,16 +3377,13 @@ extern "C" __global__ void QADDirichlet27(	 int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADBB27(int inx,
-                                   int iny,
-                                   real* DD, 
+extern "C" __global__ void QADBB27( real* DD, 
                                    real* DD27, 
                                    real* temp,
                                    real diffusivity,
                                    int* k_Q, 
                                    real* QQ,
-                                   unsigned int sizeQ,
-                                   int numberOfBCnodes, 
+                                   unsigned int numberOfBCnodes, 
                                    real om1, 
                                    unsigned int* neighborX,
                                    unsigned int* neighborY,
@@ -3577,32 +3533,32 @@ extern "C" __global__ void QADBB27(int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3936,16 +3892,14 @@ extern "C" __global__ void QADBB27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp7( int inx,
-											 int iny,
+extern "C" __global__ void QNoSlipADincomp7(
 											 real* DD, 
 											 real* DD7, 
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q, 
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes, 
+											 unsigned int numberOfBCnodes, 
 											 real om1, 
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -4054,12 +4008,12 @@ extern "C" __global__ void QNoSlipADincomp7( int inx,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4362,16 +4316,14 @@ extern "C" __global__ void QNoSlipADincomp7( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp27(int inx,
-											 int iny,
+extern "C" __global__ void QNoSlipADincomp27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q, 
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes, 
+											 unsigned int numberOfBCnodes, 
 											 real om1, 
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -4521,32 +4473,32 @@ extern "C" __global__ void QNoSlipADincomp27(int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4845,8 +4797,7 @@ extern "C" __global__ void QNoSlipADincomp27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp7(  int inx,
-											int iny,
+extern "C" __global__ void QADVeloIncomp7(
 											real* DD, 
 											real* DD7, 
 											real* temp,
@@ -4854,8 +4805,7 @@ extern "C" __global__ void QADVeloIncomp7(  int inx,
 											real diffusivity,
 											int* k_Q, 
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes, 
+											unsigned int numberOfBCnodes, 
 											real om1, 
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -4964,12 +4914,12 @@ extern "C" __global__ void QADVeloIncomp7(  int inx,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5325,8 +5275,7 @@ extern "C" __global__ void QADVeloIncomp7(  int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp27( int inx,
-											int iny,
+extern "C" __global__ void QADVeloIncomp27(
 											real* DD, 
 											real* DD27, 
 											real* temp,
@@ -5334,8 +5283,7 @@ extern "C" __global__ void QADVeloIncomp27( int inx,
 											real diffusivity,
 											int* k_Q, 
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes, 
+											unsigned int numberOfBCnodes, 
 											real om1, 
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -5485,32 +5433,32 @@ extern "C" __global__ void QADVeloIncomp27( int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5849,8 +5797,7 @@ extern "C" __global__ void QADPressIncomp7(int inx,
 										   real diffusivity,
 										   int* k_Q, 
 										   real* QQ,
-										   unsigned int sizeQ,
-										   int numberOfBCnodes, 
+										   unsigned int numberOfBCnodes, 
 										   real om1, 
 										   unsigned int* neighborX,
 										   unsigned int* neighborY,
@@ -5959,12 +5906,12 @@ extern "C" __global__ void QADPressIncomp7(int inx,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -6281,17 +6228,15 @@ extern "C" __global__ void QADPressIncomp7(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressIncomp27(   int inx,
-											   int iny,
-											   real* DD, 
-											   real* DD27, 
+extern "C" __global__ void QADPressIncomp27(
+											   real* DD,
+											   real* DD27,
 											   real* temp,
 											   real* velo,
 											   real diffusivity,
 											   int* k_Q, 
 											   real* QQ,
-											   unsigned int sizeQ,
-											   int numberOfBCnodes, 
+											   unsigned int numberOfBCnodes, 
 											   real om1, 
 											   unsigned int* neighborX,
 											   unsigned int* neighborY,
@@ -6433,7 +6378,7 @@ extern "C" __global__ void QADPressIncomp27(   int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
+   if(k < numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -6441,32 +6386,32 @@ extern "C" __global__ void QADPressIncomp27(   int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -6790,7 +6735,7 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
     real *distributionsAD,
     int *QindexArray,
     real *Qarrays,
-    uint numberOfQs,
+    uint numberOfBCnodes,
     real omegaDiffusivity,
     uint* neighborX,
     uint* neighborY,
@@ -6932,7 +6877,7 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
     const unsigned k = nx * (ny * z + y) + x;
     //////////////////////////////////////////////////////////////////////////
 
-    if (k < numberOfQs)
+    if (k < numberOfBCnodes)
     {
         ////////////////////////////////////////////////////////////////////////////////
         real NormX = normalX[k];
@@ -6944,32 +6889,32 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
             * q_dirBE, * q_dirTW, * q_dirTN, * q_dirBS, * q_dirBN, * q_dirTS,
             * q_dirTNE, * q_dirTSW, * q_dirTSE, * q_dirTNW, * q_dirBNE, * q_dirBSW,
             * q_dirBSE, * q_dirBNW;
-        q_dirE   = &Qarrays[dirE   * numberOfQs];
-        q_dirW   = &Qarrays[dirW   * numberOfQs];
-        q_dirN   = &Qarrays[dirN   * numberOfQs];
-        q_dirS   = &Qarrays[dirS   * numberOfQs];
-        q_dirT   = &Qarrays[dirT   * numberOfQs];
-        q_dirB   = &Qarrays[dirB   * numberOfQs];
-        q_dirNE  = &Qarrays[dirNE  * numberOfQs];
-        q_dirSW  = &Qarrays[dirSW  * numberOfQs];
-        q_dirSE  = &Qarrays[dirSE  * numberOfQs];
-        q_dirNW  = &Qarrays[dirNW  * numberOfQs];
-        q_dirTE  = &Qarrays[dirTE  * numberOfQs];
-        q_dirBW  = &Qarrays[dirBW  * numberOfQs];
-        q_dirBE  = &Qarrays[dirBE  * numberOfQs];
-        q_dirTW  = &Qarrays[dirTW  * numberOfQs];
-        q_dirTN  = &Qarrays[dirTN  * numberOfQs];
-        q_dirBS  = &Qarrays[dirBS  * numberOfQs];
-        q_dirBN  = &Qarrays[dirBN  * numberOfQs];
-        q_dirTS  = &Qarrays[dirTS  * numberOfQs];
-        q_dirTNE = &Qarrays[dirTNE * numberOfQs];
-        q_dirTSW = &Qarrays[dirTSW * numberOfQs];
-        q_dirTSE = &Qarrays[dirTSE * numberOfQs];
-        q_dirTNW = &Qarrays[dirTNW * numberOfQs];
-        q_dirBNE = &Qarrays[dirBNE * numberOfQs];
-        q_dirBSW = &Qarrays[dirBSW * numberOfQs];
-        q_dirBSE = &Qarrays[dirBSE * numberOfQs];
-        q_dirBNW = &Qarrays[dirBNW * numberOfQs];
+        q_dirE   = &Qarrays[dirE   * numberOfBCnodes];
+        q_dirW   = &Qarrays[dirW   * numberOfBCnodes];
+        q_dirN   = &Qarrays[dirN   * numberOfBCnodes];
+        q_dirS   = &Qarrays[dirS   * numberOfBCnodes];
+        q_dirT   = &Qarrays[dirT   * numberOfBCnodes];
+        q_dirB   = &Qarrays[dirB   * numberOfBCnodes];
+        q_dirNE  = &Qarrays[dirNE  * numberOfBCnodes];
+        q_dirSW  = &Qarrays[dirSW  * numberOfBCnodes];
+        q_dirSE  = &Qarrays[dirSE  * numberOfBCnodes];
+        q_dirNW  = &Qarrays[dirNW  * numberOfBCnodes];
+        q_dirTE  = &Qarrays[dirTE  * numberOfBCnodes];
+        q_dirBW  = &Qarrays[dirBW  * numberOfBCnodes];
+        q_dirBE  = &Qarrays[dirBE  * numberOfBCnodes];
+        q_dirTW  = &Qarrays[dirTW  * numberOfBCnodes];
+        q_dirTN  = &Qarrays[dirTN  * numberOfBCnodes];
+        q_dirBS  = &Qarrays[dirBS  * numberOfBCnodes];
+        q_dirBN  = &Qarrays[dirBN  * numberOfBCnodes];
+        q_dirTS  = &Qarrays[dirTS  * numberOfBCnodes];
+        q_dirTNE = &Qarrays[dirTNE * numberOfBCnodes];
+        q_dirTSW = &Qarrays[dirTSW * numberOfBCnodes];
+        q_dirTSE = &Qarrays[dirTSE * numberOfBCnodes];
+        q_dirTNW = &Qarrays[dirTNW * numberOfBCnodes];
+        q_dirBNE = &Qarrays[dirBNE * numberOfBCnodes];
+        q_dirBSW = &Qarrays[dirBSW * numberOfBCnodes];
+        q_dirBSE = &Qarrays[dirBSE * numberOfBCnodes];
+        q_dirBNW = &Qarrays[dirBNW * numberOfBCnodes];
         ////////////////////////////////////////////////////////////////////////////////
         //index
         unsigned int KQK   = QindexArray[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index e17fee1cdde7c1772d0e12b705a45c0b9832835a..5267f83838dcf131c3a6ab41ec05467e174ccfe8 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -1474,9 +1474,6 @@ void CudaMemoryManager::cudaAllocSlipBC(int lev)
 {
     unsigned int mem_size_Q_k      = sizeof(int)*parameter->getParH(lev)->slipBC.numberOfBCnodes;
     unsigned int mem_size_Q_q      = sizeof(real)*parameter->getParH(lev)->slipBC.numberOfBCnodes;
-    //unsigned int mem_size_Q_value  = sizeof(long long)*parameter->getParH(lev)->slipBC.kQ; //Geller
-    //unsigned int mem_size_Q_q_read = sizeof(real)*parameter->getParH(lev)->numberOfSlipBCnodesRead;     //Geller
-
     //Host
     checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->slipBC.q27[0]), parameter->getD3Qxx()*mem_size_Q_q      ));
     checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->slipBC.k),                            mem_size_Q_k      ));
diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
index 54484c322397614db297f964e2586653f1dad88b..109902e113571cf5e2acdb0e9959b15aee65b9b5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
@@ -93,39 +93,38 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 
 	if(k<numberOfBCnodes)
 	{
-		unsigned int sizeQ = numberOfBCnodes;
 		////////////////////////////////////////////////////////////////////////////////
 		real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
 			*q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[dirE   *sizeQ];
-		q_dirW   = &QQ[dirW   *sizeQ];
-		q_dirN   = &QQ[dirN   *sizeQ];
-		q_dirS   = &QQ[dirS   *sizeQ];
-		q_dirT   = &QQ[dirT   *sizeQ];
-		q_dirB   = &QQ[dirB   *sizeQ];
-		q_dirNE  = &QQ[dirNE  *sizeQ];
-		q_dirSW  = &QQ[dirSW  *sizeQ];
-		q_dirSE  = &QQ[dirSE  *sizeQ];
-		q_dirNW  = &QQ[dirNW  *sizeQ];
-		q_dirTE  = &QQ[dirTE  *sizeQ];
-		q_dirBW  = &QQ[dirBW  *sizeQ];
-		q_dirBE  = &QQ[dirBE  *sizeQ];
-		q_dirTW  = &QQ[dirTW  *sizeQ];
-		q_dirTN  = &QQ[dirTN  *sizeQ];
-		q_dirBS  = &QQ[dirBS  *sizeQ];
-		q_dirBN  = &QQ[dirBN  *sizeQ];
-		q_dirTS  = &QQ[dirTS  *sizeQ];
-		q_dirTNE = &QQ[dirTNE *sizeQ];
-		q_dirTSW = &QQ[dirTSW *sizeQ];
-		q_dirTSE = &QQ[dirTSE *sizeQ];
-		q_dirTNW = &QQ[dirTNW *sizeQ];
-		q_dirBNE = &QQ[dirBNE *sizeQ];
-		q_dirBSW = &QQ[dirBSW *sizeQ];
-		q_dirBSE = &QQ[dirBSE *sizeQ];
-		q_dirBNW = &QQ[dirBNW *sizeQ];
+		q_dirE   = &QQ[dirE   * numberOfBCnodes];
+		q_dirW   = &QQ[dirW   * numberOfBCnodes];
+		q_dirN   = &QQ[dirN   * numberOfBCnodes];
+		q_dirS   = &QQ[dirS   * numberOfBCnodes];
+		q_dirT   = &QQ[dirT   * numberOfBCnodes];
+		q_dirB   = &QQ[dirB   * numberOfBCnodes];
+		q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+		q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+		q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+		q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+		q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+		q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+		q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+		q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+		q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+		q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+		q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+		q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+		q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+		q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+		q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+		q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+		q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+		q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+		q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+		q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -359,39 +358,38 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 
 	if(k<numberOfBCnodes)
 	{
-		unsigned int sizeQ = numberOfBCnodes;
 		////////////////////////////////////////////////////////////////////////////////
 		real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
 			*q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[dirE   *sizeQ];
-		q_dirW   = &QQ[dirW   *sizeQ];
-		q_dirN   = &QQ[dirN   *sizeQ];
-		q_dirS   = &QQ[dirS   *sizeQ];
-		q_dirT   = &QQ[dirT   *sizeQ];
-		q_dirB   = &QQ[dirB   *sizeQ];
-		q_dirNE  = &QQ[dirNE  *sizeQ];
-		q_dirSW  = &QQ[dirSW  *sizeQ];
-		q_dirSE  = &QQ[dirSE  *sizeQ];
-		q_dirNW  = &QQ[dirNW  *sizeQ];
-		q_dirTE  = &QQ[dirTE  *sizeQ];
-		q_dirBW  = &QQ[dirBW  *sizeQ];
-		q_dirBE  = &QQ[dirBE  *sizeQ];
-		q_dirTW  = &QQ[dirTW  *sizeQ];
-		q_dirTN  = &QQ[dirTN  *sizeQ];
-		q_dirBS  = &QQ[dirBS  *sizeQ];
-		q_dirBN  = &QQ[dirBN  *sizeQ];
-		q_dirTS  = &QQ[dirTS  *sizeQ];
-		q_dirTNE = &QQ[dirTNE *sizeQ];
-		q_dirTSW = &QQ[dirTSW *sizeQ];
-		q_dirTSE = &QQ[dirTSE *sizeQ];
-		q_dirTNW = &QQ[dirTNW *sizeQ];
-		q_dirBNE = &QQ[dirBNE *sizeQ];
-		q_dirBSW = &QQ[dirBSW *sizeQ];
-		q_dirBSE = &QQ[dirBSE *sizeQ];
-		q_dirBNW = &QQ[dirBNW *sizeQ];
+		q_dirE   = &QQ[dirE   * numberOfBCnodes];
+		q_dirW   = &QQ[dirW   * numberOfBCnodes];
+		q_dirN   = &QQ[dirN   * numberOfBCnodes];
+		q_dirS   = &QQ[dirS   * numberOfBCnodes];
+		q_dirT   = &QQ[dirT   * numberOfBCnodes];
+		q_dirB   = &QQ[dirB   * numberOfBCnodes];
+		q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+		q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+		q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+		q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+		q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+		q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+		q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+		q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+		q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+		q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+		q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+		q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+		q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+		q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+		q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+		q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+		q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+		q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+		q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+		q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index 80ed9a2f52160b11e2f3cdff4641759461c4bd9c..f85640484c43af0e155dc56348d920835c6016e5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -651,7 +651,6 @@ extern "C" void QDev27( unsigned int numberOfThreads,
                         real* DD, 
                         int* k_Q, 
                         real* QQ,
-                        unsigned int sizeQ,
                         unsigned int numberOfBCnodes, 
                         real om1, 
                         unsigned int* neighborX,
@@ -661,25 +660,21 @@ extern "C" void QDev27( unsigned int numberOfThreads,
                         bool isEvenTimestep);
 
 extern "C" void QDevComp27(unsigned int numberOfThreads,
-						   int nx,
-						   int ny,
-						   real* DD, 
-						   int* k_Q, 
-						   real* QQ,
-						   unsigned int sizeQ,
+						   real* distribution, 
+						   int* subgridDistanceIndices, 
+						   real* subgridDistances,
 						   unsigned int numberOfBCnodes, 
-						   real om1, 
+						   real omega, 
 						   unsigned int* neighborX,
 						   unsigned int* neighborY,
 						   unsigned int* neighborZ,
-						   unsigned int size_Mat, 
+						   unsigned int numberOfLBnodes, 
 						   bool isEvenTimestep);
 
 extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* geom,
@@ -696,7 +691,6 @@ extern "C" void QDev3rdMomentsComp27(  unsigned int numberOfThreads,
 									   real* DD, 
 									   int* k_Q, 
 									   real* QQ,
-									   unsigned int sizeQ,
 									   unsigned int numberOfBCnodes, 
 									   real om1, 
 									   unsigned int* neighborX,
@@ -706,12 +700,9 @@ extern "C" void QDev3rdMomentsComp27(  unsigned int numberOfThreads,
 									   bool isEvenTimestep);
 
 extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
-									 int nx,
-									 int ny,
 									 real* DD, 
 									 int* k_Q, 
 									 real* QQ,
-									 unsigned int sizeQ,
 									 unsigned int numberOfBCnodes, 
 									 real om1, 
 									 unsigned int* neighborX,
@@ -721,12 +712,9 @@ extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 bool isEvenTimestep);
 
 extern "C" void QDevCompHighNu27(unsigned int numberOfThreads,
-								 int nx,
-								 int ny,
 								 real* DD, 
 								 int* k_Q, 
 								 real* QQ,
-								 unsigned int sizeQ,
 								 unsigned int numberOfBCnodes, 
 								 real om1, 
 								 unsigned int* neighborX,
@@ -742,8 +730,7 @@ extern "C" void QVelDevicePlainBB27(unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
-									int numberOfBCnodes, 
+									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
 									unsigned int* neighborY,
@@ -751,15 +738,14 @@ extern "C" void QVelDevicePlainBB27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 	
-extern "C" void QVelDeviceCouhette27(unsigned int numberOfThreads,
+extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
 									real* DD,
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
-									int numberOfBCnodes, 
+									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
 									unsigned int* neighborY,
@@ -776,7 +762,6 @@ extern "C" void QVelDevice1h27( unsigned int numberOfThreads,
 								real* DD, 
 								int* k_Q, 
 								real* QQ,
-								unsigned int sizeQ,
 								unsigned int numberOfBCnodes, 
 								real om1, 
 								real Phi, 
@@ -799,7 +784,6 @@ extern "C" void QVelDev27(unsigned int numberOfThreads,
                           real* DD, 
                           int* k_Q, 
                           real* QQ,
-                          unsigned int sizeQ,
                           unsigned int numberOfBCnodes, 
                           real om1, 
                           unsigned int* neighborX,
@@ -809,15 +793,12 @@ extern "C" void QVelDev27(unsigned int numberOfThreads,
                           bool isEvenTimestep);
 
 extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* vx,
 									  real* vy,
 									  real* vz,
 									  real* DD, 
 									  int* k_Q, 
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes, 
 									  real om1, 
 									  unsigned int* neighborX,
@@ -833,8 +814,7 @@ extern "C" void QVelDevComp27(unsigned int numberOfThreads,
 							  real* distribution,
 							  int* subgridDistanceIndices,
 							  real* subgridDistances,
-							  unsigned int numberOfSubgridIndices,
-							  int numberOfBCnodes,
+							  unsigned int numberOfBCnodes,
 							  real omega,
 							  unsigned int* neighborX,
 							  unsigned int* neighborY,
@@ -849,7 +829,6 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* DD, 
 							           int* k_Q, 
 							           real* QQ,
-							           unsigned int sizeQ,
 							           unsigned int numberOfBCnodes, 
 							           real om1, 
 									   unsigned int* geom,
@@ -860,34 +839,29 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           unsigned int size_Mat, 
 							           bool isEvenTimestep);
 
-extern "C" void QVelDevCompZeroPress27(unsigned int numberOfThreads,
-									   int nx,
-									   int ny,
-									   real* vx,
-									   real* vy,
-									   real* vz,
-									   real* DD, 
-									   int* k_Q, 
-									   real* QQ,
-									   unsigned int sizeQ,
-									   int kArray, 
-									   real om1, 
-									   unsigned int* neighborX,
-									   unsigned int* neighborY,
-									   unsigned int* neighborZ,
-									   unsigned int size_Mat, 
-									   bool isEvenTimestep);
+extern "C" void QVelDevCompZeroPress27(
+    unsigned int numberOfThreads,
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distribution,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int numberOfLBnodes,
+    bool isEvenTimestep);
 
 extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
-										int nx,
-										int ny,
 										real* vx,
 										real* vy,
 										real* vz,
 										real* DD, 
 										int* k_Q, 
 										real* QQ,
-										unsigned int sizeQ,
 										unsigned int numberOfBCnodes, 
 										real om1, 
 										unsigned int* neighborX,
@@ -897,15 +871,12 @@ extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										bool isEvenTimestep);
 
 extern "C" void QVelDevCompHighNu27(unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* vx,
 									real* vy,
 									real* vz,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
@@ -947,7 +918,7 @@ extern "C" void QSlipDev27( unsigned int numberOfThreads,
 							real* DD, 
 							int* k_Q, 
 							real* QQ,
-							unsigned int sizeQ,
+							unsigned int numberOfBCnodes,
 							real om1, 
 							unsigned int* neighborX,
 							unsigned int* neighborY,
@@ -959,7 +930,7 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads,
 							   real* DD, 
 							   int* k_Q, 
 							   real* QQ,
-							   unsigned int sizeQ,
+							   unsigned int numberOfBCnodes,
 							   real om1, 
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
@@ -973,7 +944,7 @@ extern "C" void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
+									unsigned int numberOfBCnodes,
 									real om1, 
 									real* NormalX,
 									real* NormalY,
@@ -988,7 +959,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD, 
 								   int* k_Q, 
 								   real* QQ,
-								   unsigned int sizeQ,
+								   unsigned int numberOfBCnodes,
 								   real om1, 
 								   real* NormalX,
 								   real* NormalY,
@@ -1004,7 +975,7 @@ extern "C" void QStressDevComp27(unsigned int numberOfThreads,
 								int* k_Q, 
 								int* k_N,
 								real* QQ,
-								unsigned int sizeQ,
+								unsigned int numberOfBCnodes,
 								real om1, 
 								real* turbViscosity,
 								real* vx,
@@ -1037,7 +1008,7 @@ extern "C" void BBStressDev27(  unsigned int numberOfThreads,
 								int* k_Q, 
 								int* k_N, 
 								real* QQ,
-								unsigned int sizeQ,
+								unsigned int numberOfBCnodes,
 								real* vx,
 								real* vy,
 								real* vz,
@@ -1064,13 +1035,10 @@ extern "C" void BBStressDev27(  unsigned int numberOfThreads,
 								bool isEvenTimestep);
 
 extern "C" void QPressDev27(unsigned int numberOfThreads,
-                          int nx,
-                          int ny,
                           real* rhoBC,
                           real* DD, 
                           int* k_Q, 
                           real* QQ,
-                          unsigned int sizeQ,
                           unsigned int numberOfBCnodes, 
                           real om1, 
                           unsigned int* neighborX,
@@ -1157,11 +1125,11 @@ extern "C" void QPressDevIncompNEQ27(unsigned int numberOfThreads,
 
 extern "C" void QPressDevNEQ27(unsigned int numberOfThreads,
 							   real* rhoBC,
-							   real* DD, 
-							   int* k_Q, 
-							   int* k_N, 
+							   real* distribution, 
+							   int* bcNodeIndices, 
+							   int* bcNeighborIndices, 
 							   unsigned int numberOfBCnodes, 
-							   real om1, 
+							   real omega1, 
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
 							   unsigned int* neighborZ,
@@ -1211,7 +1179,6 @@ extern "C" void BBDev27( unsigned int numberOfThreads,
                         real* DD, 
                         int* k_Q, 
                         real* QQ,
-                        unsigned int sizeQ,
                         unsigned int numberOfBCnodes, 
                         real om1, 
                         unsigned int* neighborX,
@@ -1225,7 +1192,6 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
@@ -1283,15 +1249,12 @@ extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
                                  bool isEvenTimestep);
 
 extern "C" void QADDev7(unsigned int numberOfThreads,
-                        int nx,
-                        int ny,
                         real* DD, 
                         real* DD7,
                         real* temp,
                         real diffusivity,
                         int* k_Q, 
                         real* QQ,
-                        unsigned int sizeQ,
                         unsigned int numberOfBCnodes, 
                         real om1, 
                         unsigned int* neighborX,
@@ -1326,7 +1289,7 @@ extern "C" void ADSlipVelDevComp(
 	real * distributionsAD,
 	int* QindexArray,
 	real * Qarrays,
-	uint numberOfQs,
+	uint numberOfBCnodes,
 	real omegaDiffusivity,
 	uint * neighborX,
 	uint * neighborY,
@@ -1335,15 +1298,12 @@ extern "C" void ADSlipVelDevComp(
 	bool isEvenTimestep);
 	
 extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
-								   int nx,
-								   int ny,
 								   real* DD, 
 								   real* DD27,
 								   real* temp,
 								   real diffusivity,
 								   int* k_Q, 
 								   real* QQ,
-								   unsigned int sizeQ,
 								   unsigned int numberOfBCnodes, 
 								   real om1, 
 								   unsigned int* neighborX,
@@ -1353,15 +1313,12 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
 								   bool isEvenTimestep);
 
 extern "C" void QADBBDev27(  unsigned int numberOfThreads,
-							 int nx,
-							 int ny,
 							 real* DD, 
 							 real* DD27,
 							 real* temp,
 							 real diffusivity,
 							 int* k_Q, 
 							 real* QQ,
-							 unsigned int sizeQ,
 							 unsigned int numberOfBCnodes, 
 							 real om1, 
 							 unsigned int* neighborX,
@@ -1371,8 +1328,6 @@ extern "C" void QADBBDev27(  unsigned int numberOfThreads,
 							 bool isEvenTimestep);
 
 extern "C" void QADVelDev7(unsigned int numberOfThreads,
-                           int nx,
-                           int ny,
                            real* DD, 
                            real* DD7,
                            real* temp,
@@ -1380,7 +1335,6 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
                            real diffusivity,
                            int* k_Q, 
                            real* QQ,
-                           unsigned int sizeQ,
                            unsigned int numberOfBCnodes, 
                            real om1, 
                            unsigned int* neighborX,
@@ -1391,8 +1345,6 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
 
 
 extern "C" void QADVelDev27(  unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1400,7 +1352,6 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q, 
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes, 
                               real om1, 
                               unsigned int* neighborX,
@@ -1410,8 +1361,6 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
                               bool isEvenTimestep);
 
 extern "C" void QADPressDev7( unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
                               real* DD, 
                               real* DD7,
                               real* temp,
@@ -1419,7 +1368,6 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q, 
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes, 
                               real om1, 
                               unsigned int* neighborX,
@@ -1429,8 +1377,6 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
                               bool isEvenTimestep);
 
 extern "C" void QADPressDev27(unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1438,7 +1384,6 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q, 
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes, 
                               real om1, 
                               unsigned int* neighborX,
@@ -1462,15 +1407,12 @@ extern "C" void QADPressNEQNeighborDev27(
 										);
 
 extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* DD, 
 									real* DD7,
 									real* temp,
 									real diffusivity,
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
@@ -1480,15 +1422,12 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
 extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
-									 int nx,
-									 int ny,
 									 real* DD, 
 									 real* DD27,
 									 real* temp,
 									 real diffusivity,
 									 int* k_Q, 
 									 real* QQ,
-									 unsigned int sizeQ,
 									 unsigned int numberOfBCnodes, 
 									 real om1, 
 									 unsigned int* neighborX,
@@ -1498,8 +1437,6 @@ extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 bool isEvenTimestep);
 
 extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
-								   int nx,
-								   int ny,
 								   real* DD, 
 								   real* DD7,
 								   real* temp,
@@ -1507,7 +1444,6 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real diffusivity,
 								   int* k_Q, 
 								   real* QQ,
-								   unsigned int sizeQ,
 								   unsigned int numberOfBCnodes, 
 								   real om1, 
 								   unsigned int* neighborX,
@@ -1518,8 +1454,6 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 
 
 extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* DD, 
 									real* DD27,
 									real* temp,
@@ -1527,7 +1461,6 @@ extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
 									real diffusivity,
 									int* k_Q, 
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes, 
 									real om1, 
 									unsigned int* neighborX,
@@ -1537,8 +1470,6 @@ extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
 extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
-									 int nx,
-									 int ny,
 									 real* DD, 
 									 real* DD7,
 									 real* temp,
@@ -1546,7 +1477,6 @@ extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 real diffusivity,
 									 int* k_Q, 
 									 real* QQ,
-									 unsigned int sizeQ,
 									 unsigned int numberOfBCnodes, 
 									 real om1, 
 									 unsigned int* neighborX,
@@ -1556,8 +1486,6 @@ extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 bool isEvenTimestep);
 
 extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* DD, 
 									  real* DD27,
 									  real* temp,
@@ -1565,7 +1493,6 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real diffusivity,
 									  int* k_Q, 
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes, 
 									  real om1, 
 									  unsigned int* neighborX,
@@ -2519,15 +2446,12 @@ extern "C" void setRecvGsDevF3(
 	unsigned int numberOfThreads);
 
 extern "C" void WallFuncDev27(unsigned int numberOfThreads,
-							  int nx,
-							  int ny,
 							  real* vx,
 							  real* vy,
 							  real* vz,
 							  real* DD, 
 							  int* k_Q, 
 							  real* QQ,
-							  unsigned int sizeQ,
 							  unsigned int numberOfBCnodes, 
 							  real om1, 
 							  unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index b6f403b1d1b9a176ef6b1be607d927fb5eceb606..5dc54bd26659d85fbbe454036a9bcedd2045450f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -603,8 +603,7 @@ extern "C" __global__ void QDevice27(int inx,
                                      real* DD,
                                      int* k_Q,
                                      real* QQ,
-                                     unsigned int sizeQ,
-                                     int numberOfBCnodes,
+                                     unsigned int numberOfBCnodes,
                                      real minusomega,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
@@ -612,25 +611,22 @@ extern "C" __global__ void QDevice27(int inx,
                                      unsigned int size_Mat,
                                      bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceComp27(int inx,
-										 int iny,
-										 real* DD,
-										 int* k_Q,
-										 real* QQ,
-										 unsigned int sizeQ,
-										 int numberOfBCnodes,
-										 real minusomega,
+extern "C" __global__ void QDeviceComp27(
+										 real* distribution,
+										 int* subgridDistanceIndices,
+										 real* subgridDistances,
+										 unsigned int numberOfBCnodes,
+										 real omega,
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
+										 unsigned int numberOfLBnodes,
 										 bool isEvenTimestep);
 
 extern "C" __global__ void QDeviceCompThinWallsPartOne27(real* DD,
 														 int* k_Q,
 														 real* QQ,
-														 unsigned int sizeQ,
-														 int numberOfBCnodes,
+														 unsigned int numberOfBCnodes,
 														 real om1,
 														 unsigned int* neighborX,
 														 unsigned int* neighborY,
@@ -643,8 +639,7 @@ extern "C" __global__ void QDevice3rdMomentsComp27(  int inx,
 													 real* DD,
 													 int* k_Q,
 													 real* QQ,
-													 unsigned int sizeQ,
-													 int numberOfBCnodes,
+													 unsigned int numberOfBCnodes,
 													 real minusomega,
 													 unsigned int* neighborX,
 													 unsigned int* neighborY,
@@ -652,13 +647,10 @@ extern "C" __global__ void QDevice3rdMomentsComp27(  int inx,
 													 unsigned int size_Mat,
 													 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceIncompHighNu27(int inx,
-												 int iny,
-												 real* DD,
+extern "C" __global__ void QDeviceIncompHighNu27(real* DD,
 												 int* k_Q,
 												 real* QQ,
-												 unsigned int sizeQ,
-												 int numberOfBCnodes,
+												 unsigned int numberOfBCnodes,
 												 real om1,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
@@ -666,13 +658,10 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
 												 unsigned int size_Mat,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceCompHighNu27(  int inx,
-												 int iny,
-												 real* DD,
+extern "C" __global__ void QDeviceCompHighNu27(	 real* DD,
 												 int* k_Q,
 												 real* QQ,
-												 unsigned int sizeQ,
-												 int numberOfBCnodes,
+												 unsigned int numberOfBCnodes,
 												 real om1,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
@@ -687,8 +676,7 @@ extern "C" __global__ void QVelDevPlainBB27(real* vx,
 											real* DD,
 											int* k_Q,
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real om1,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -696,14 +684,13 @@ extern "C" __global__ void QVelDevPlainBB27(real* vx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QVelDevCouhette27(real* vx,
+extern "C" __global__ void QVelDevCouette27(real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
 											int* k_Q,
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real om1,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -719,8 +706,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 										real* DD,
 										int* k_Q,
 										real* QQ,
-										unsigned int sizeQ,
-										int numberOfBCnodes,
+										unsigned int numberOfBCnodes,
 										real om1,
 										real Phi,
 										real angularVelocity,
@@ -741,8 +727,7 @@ extern "C" __global__ void QVelDevice27(int inx,
                                         real* DD,
                                         int* k_Q,
                                         real* QQ,
-                                        unsigned int sizeQ,
-                                        int numberOfBCnodes,
+                                        unsigned int numberOfBCnodes,
                                         real om1,
                                         unsigned int* neighborX,
                                         unsigned int* neighborY,
@@ -750,16 +735,13 @@ extern "C" __global__ void QVelDevice27(int inx,
                                         unsigned int size_Mat,
                                         bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompPlusSlip27(int inx,
-													int iny,
-													real* vx,
+extern "C" __global__ void QVelDeviceCompPlusSlip27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
 													int* k_Q,
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes,
+													unsigned int numberOfBCnodes,
 													real om1,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -773,8 +755,7 @@ extern "C" __global__ void QVelDeviceComp27(real* velocityX,
 											real* distribution,
 											int* subgridDistanceIndices,
 											real* subgridDistances,
-											unsigned int numberOfSubgridIndices,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real omega,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -789,8 +770,7 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
-	uint sizeQ,
-	int numberOfBCnodes,
+	uint numberOfBCnodes,
 	real om1,
 	uint* neighborX,
 	uint* neighborY,
@@ -802,8 +782,7 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
-	uint sizeQ,
-	int numberOfBCnodes,
+	uint numberOfBCnodes,
 	uint* geom,
 	uint* neighborX,
 	uint* neighborY,
@@ -812,33 +791,28 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompZeroPress27(   int inx,
-														int iny,
-														real* vx,
-														real* vy,
-														real* vz,
-														real* DD,
-														int* k_Q,
-														real* QQ,
-														unsigned int sizeQ,
-														//int numberOfBCnodes,
-														real om1,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														bool isEvenTimestep);
+extern "C" __global__ void QVelDeviceCompZeroPress27(
+	real* velocityX,
+	real* velocityY,
+	real* velocityZ,
+	real* distribution,
+	int* subgridDistanceIndices,
+	real* subgridDistances,
+	unsigned int numberOfBCnodes,
+	real omega,
+	unsigned int* neighborX,
+	unsigned int* neighborY,
+	unsigned int* neighborZ,
+	unsigned int numberOfLBnodes,
+	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
-													int iny,
-													real* vx,
+extern "C" __global__ void QVelDeviceIncompHighNu27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
 													int* k_Q,
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes,
+													unsigned int numberOfBCnodes,
 													real om1,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -846,16 +820,13 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompHighNu27(  int inx,
-													int iny,
-													real* vx,
+extern "C" __global__ void QVelDeviceCompHighNu27(	real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
 													int* k_Q,
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes,
+													unsigned int numberOfBCnodes,
 													real om1,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -894,7 +865,7 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 extern "C" __global__ void QSlipDevice27(real* DD,
                                          int* k_Q,
                                          real* QQ,
-                                         unsigned int sizeQ,
+                                         unsigned int numberOfBCnodes,
                                          real om1,
                                          unsigned int* neighborX,
                                          unsigned int* neighborY,
@@ -905,7 +876,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 extern "C" __global__ void QSlipDeviceComp27(real* DD,
 											 int* k_Q,
 											 real* QQ,
-											 unsigned int sizeQ,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -916,7 +887,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
 extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 											 int* k_Q,
 											 real* QQ,
-											 unsigned int sizeQ,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -928,7 +899,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
-												 unsigned int sizeQ,
+												 unsigned int numberOfBCnodes,
 												 real om1,
 												 real* NormalX,
 												 real* NormalY,
@@ -942,7 +913,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
-												 unsigned int sizeQ,
+												 unsigned int numberOfBCnodes,
 												 real om1,
 												 real* NormalX,
 												 real* NormalY,
@@ -958,7 +929,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
 											 int* k_N,
 											 real* QQ,
-											 unsigned int sizeQ,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 real* turbViscosity,
 										     real* vx,
@@ -990,7 +961,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
 												int* k_Q,
 												int* k_N,
 												real* QQ,
-												unsigned int sizeQ,
+												unsigned int numberOfBCnodes,
 												real* vx,
 												real* vy,
 												real* vz,
@@ -1017,14 +988,11 @@ extern "C" __global__ void BBStressDevice27( real* DD,
 												bool isEvenTimestep);
 
 //Pressure BCs
-extern "C" __global__ void QPressDevice27(int inx,
-                                           int iny,
-                                           real* rhoBC,
+extern "C" __global__ void QPressDevice27( real* rhoBC,
                                            real* DD,
                                            int* k_Q,
                                            real* QQ,
-                                           unsigned int sizeQ,
-                                           int numberOfBCnodes,
+                                           unsigned int numberOfBCnodes,
                                            real om1,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
@@ -1118,11 +1086,11 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													bool isEvenTimestep);
 
 extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
-                                             real* DD,
-                                             int* k_Q,
-                                             int* k_N,
+                                             real* distribution,
+                                             int* bcNodeIndices,
+                                             int* bcNeighborIndices,
                                              int numberOfBCnodes,
-                                             real om1,
+                                             real omega1,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
@@ -1168,8 +1136,7 @@ extern "C" __global__ void BBDevice27(int inx,
                                      real* DD,
                                      int* k_Q,
                                      real* QQ,
-                                     unsigned int sizeQ,
-                                     int numberOfBCnodes,
+                                     unsigned int numberOfBCnodes,
                                      real om1,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
@@ -1181,8 +1148,7 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 												real* DD,
 												int* k_Q,
 												real* QQ,
-												unsigned int sizeQ,
-												int numberOfBCnodes,
+												unsigned int numberOfBCnodes,
 												real om1,
 												unsigned int* neighborX,
 												unsigned int* neighborY,
@@ -1224,16 +1190,13 @@ extern "C" __global__ void VelSchlaff27(  int t,
                                           bool isEvenTimestep);
 
 //Advection / Diffusion BCs
-extern "C" __global__ void QAD7( int inx,
-                                 int iny,
-                                 real* DD,
+extern "C" __global__ void QAD7( real* DD,
                                  real* DD7,
                                  real* temp,
                                  real diffusivity,
                                  int* k_Q,
                                  real* QQ,
-                                 unsigned int sizeQ,
-                                 int numberOfBCnodes,
+                                 unsigned int numberOfBCnodes,
                                  real om1,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
@@ -1265,7 +1228,7 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
 	real * distributionsAD,
 	int* QindexArray,
 	real * Qarrays,
-	uint numberOfQs,
+	uint numberOfBCnodes,
 	real omegaDiffusivity,
 	uint * neighborX,
 	uint * neighborY,
@@ -1273,16 +1236,13 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QADDirichlet27(   int inx,
-											 int iny,
-											 real* DD,
+extern "C" __global__ void QADDirichlet27(   real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q,
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -1290,16 +1250,13 @@ extern "C" __global__ void QADDirichlet27(   int inx,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADBB27(  int inx,
-									 int iny,
-									 real* DD,
+extern "C" __global__ void QADBB27(  real* DD,
 									 real* DD27,
 									 real* temp,
 									 real diffusivity,
 									 int* k_Q,
 									 real* QQ,
-									 unsigned int sizeQ,
-									 int numberOfBCnodes,
+									 unsigned int numberOfBCnodes,
 									 real om1,
 									 unsigned int* neighborX,
 									 unsigned int* neighborY,
@@ -1307,17 +1264,14 @@ extern "C" __global__ void QADBB27(  int inx,
 									 unsigned int size_Mat,
 									 bool isEvenTimestep);
 
-extern "C" __global__ void QADVel7( int inx,
-                                    int iny,
-                                    real* DD,
+extern "C" __global__ void QADVel7( real* DD,
                                     real* DD7,
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q,
                                     real* QQ,
-                                    unsigned int sizeQ,
-                                    int numberOfBCnodes,
+                                    unsigned int numberOfBCnodes,
                                     real om1,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
@@ -1325,17 +1279,14 @@ extern "C" __global__ void QADVel7( int inx,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADVel27(int inx,
-                                    int iny,
-                                    real* DD,
+extern "C" __global__ void QADVel27(real* DD,
                                     real* DD27,
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q,
                                     real* QQ,
-                                    unsigned int sizeQ,
-                                    int numberOfBCnodes,
+                                    unsigned int numberOfBCnodes,
                                     real om1,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
@@ -1343,17 +1294,14 @@ extern "C" __global__ void QADVel27(int inx,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADPress7(  int inx,
-                                       int iny,
-                                       real* DD,
+extern "C" __global__ void QADPress7(  real* DD,
                                        real* DD7,
                                        real* temp,
                                        real* velo,
                                        real diffusivity,
                                        int* k_Q,
                                        real* QQ,
-                                       unsigned int sizeQ,
-                                       int numberOfBCnodes,
+                                       unsigned int numberOfBCnodes,
                                        real om1,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
@@ -1361,17 +1309,14 @@ extern "C" __global__ void QADPress7(  int inx,
                                        unsigned int size_Mat,
                                        bool isEvenTimestep);
 
-extern "C" __global__ void QADPress27( int inx,
-                                       int iny,
-                                       real* DD,
+extern "C" __global__ void QADPress27( real* DD,
                                        real* DD27,
                                        real* temp,
                                        real* velo,
                                        real diffusivity,
                                        int* k_Q,
                                        real* QQ,
-                                       unsigned int sizeQ,
-                                       int numberOfBCnodes,
+                                       unsigned int numberOfBCnodes,
                                        real om1,
                                        unsigned int* neighborX,
                                        unsigned int* neighborY,
@@ -1392,16 +1337,13 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 												 bool isEvenTimestep
 												);
 
-extern "C" __global__ void QNoSlipADincomp7( int inx,
-											 int iny,
-											 real* DD,
+extern "C" __global__ void QNoSlipADincomp7( real* DD,
 											 real* DD7,
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q,
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -1409,16 +1351,13 @@ extern "C" __global__ void QNoSlipADincomp7( int inx,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QNoSlipADincomp27(int inx,
-											 int iny,
-											 real* DD,
+extern "C" __global__ void QNoSlipADincomp27( real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
 											 int* k_Q,
 											 real* QQ,
-											 unsigned int sizeQ,
-											 int numberOfBCnodes,
+											 unsigned int numberOfBCnodes,
 											 real om1,
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -1426,17 +1365,14 @@ extern "C" __global__ void QNoSlipADincomp27(int inx,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp7(  int inx,
-											int iny,
-											real* DD,
+extern "C" __global__ void QADVeloIncomp7(  real* DD,
 											real* DD7,
 											real* temp,
 											real* velo,
 											real diffusivity,
 											int* k_Q,
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real om1,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -1444,17 +1380,14 @@ extern "C" __global__ void QADVeloIncomp7(  int inx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp27( int inx,
-											int iny,
-											real* DD,
+extern "C" __global__ void QADVeloIncomp27( real* DD,
 											real* DD27,
 											real* temp,
 											real* velo,
 											real diffusivity,
 											int* k_Q,
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real om1,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -1462,17 +1395,14 @@ extern "C" __global__ void QADVeloIncomp27( int inx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp7(int inx,
-										   int iny,
-										   real* DD,
+extern "C" __global__ void QADPressIncomp7(real* DD,
 										   real* DD7,
 										   real* temp,
 										   real* velo,
 										   real diffusivity,
 										   int* k_Q,
 										   real* QQ,
-										   unsigned int sizeQ,
-										   int numberOfBCnodes,
+										   unsigned int numberOfBCnodes,
 										   real om1,
 										   unsigned int* neighborX,
 										   unsigned int* neighborY,
@@ -1480,17 +1410,14 @@ extern "C" __global__ void QADPressIncomp7(int inx,
 										   unsigned int size_Mat,
 										   bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp27(   int inx,
-											   int iny,
-											   real* DD,
+extern "C" __global__ void QADPressIncomp27(   real* DD,
 											   real* DD27,
 											   real* temp,
 											   real* velo,
 											   real diffusivity,
 											   int* k_Q,
 											   real* QQ,
-											   unsigned int sizeQ,
-											   int numberOfBCnodes,
+											   unsigned int numberOfBCnodes,
 											   real om1,
 											   unsigned int* neighborX,
 											   unsigned int* neighborY,
@@ -2381,16 +2308,13 @@ extern "C" __global__ void setRecvGsF3(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void WallFunction27(  int inx,
-											int iny,
-											real* vx,
+extern "C" __global__ void WallFunction27( 	real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
 											int* k_Q,
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real om1,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
index e900832872eb46fc5a7d912231456576d9b3be02..5e87cbc00ab11a413086fea307a68b1c0db10336 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
@@ -147,5 +147,23 @@ __inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, c
            + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
 }
 
+__inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, 
+                                                                const real& omega, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
+           + (q * (f + fInverse)) / (c1o1 + q);
+}
+
+
+__inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, 
+                                                                            const real& omega, const real& drho, const real& velocity, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
+           + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q) - weight * drho;
+}
+
+
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 190e4fb1282185d3c6c637bdd5820dc0196bf20a..42e5bd12586a49905b0cad494ee7400136fb112a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -1914,7 +1914,6 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q,
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes,
                               real om1,
                               unsigned int* neighborX,
@@ -1938,16 +1937,13 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADPress7<<< gridQ, threads >>>( nx,
-                                       ny,
-                                       DD,
+      QADPress7<<< gridQ, threads >>>( DD,
                                        DD7,
                                        temp,
                                        velo,
                                        diffusivity,
                                        k_Q,
                                        QQ,
-                                       sizeQ,
                                        numberOfBCnodes,
                                        om1,
                                        neighborX,
@@ -1959,8 +1955,6 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADPressDev27(unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -1968,7 +1962,6 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q,
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes,
                               real om1,
                               unsigned int* neighborX,
@@ -1992,16 +1985,13 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADPress27<<< gridQ, threads >>>(   nx,
-                                          ny,
-                                          DD,
+      QADPress27<<< gridQ, threads >>>(   DD,
                                           DD27,
                                           temp,
                                           velo,
                                           diffusivity,
                                           k_Q,
                                           QQ,
-                                          sizeQ,
                                           numberOfBCnodes,
                                           om1,
                                           neighborX,
@@ -2058,8 +2048,6 @@ extern "C" void QADPressNEQNeighborDev27(
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADVelDev7(unsigned int numberOfThreads,
-                           int nx,
-                           int ny,
                            real* DD,
                            real* DD7,
                            real* temp,
@@ -2067,7 +2055,6 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
                            real diffusivity,
                            int* k_Q,
                            real* QQ,
-                           unsigned int sizeQ,
                            unsigned int numberOfBCnodes,
                            real om1,
                            unsigned int* neighborX,
@@ -2091,8 +2078,7 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADVel7<<< gridQ, threads >>> (  nx,
-                                       ny,
+      QADVel7<<< gridQ, threads >>> (  
                                        DD,
                                        DD7,
                                        temp,
@@ -2100,7 +2086,6 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
                                        diffusivity,
                                        k_Q,
                                        QQ,
-                                       sizeQ,
                                        numberOfBCnodes,
                                        om1,
                                        neighborX,
@@ -2112,8 +2097,6 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADVelDev27(  unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -2121,7 +2104,6 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
                               real diffusivity,
                               int* k_Q,
                               real* QQ,
-                              unsigned int sizeQ,
                               unsigned int numberOfBCnodes,
                               real om1,
                               unsigned int* neighborX,
@@ -2145,16 +2127,13 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADVel27<<< gridQ, threads >>> (nx,
-                                      ny,
-                                      DD,
+      QADVel27<<< gridQ, threads >>> ( DD,
                                       DD27,
                                       temp,
                                       velo,
                                       diffusivity,
                                       k_Q,
                                       QQ,
-                                      sizeQ,
                                       numberOfBCnodes,
                                       om1,
                                       neighborX,
@@ -2166,15 +2145,12 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADDev7(unsigned int numberOfThreads,
-                        int nx,
-                        int ny,
                         real* DD,
                         real* DD7,
                         real* temp,
                         real diffusivity,
                         int* k_Q,
                         real* QQ,
-                        unsigned int sizeQ,
                         unsigned int numberOfBCnodes,
                         real om1,
                         unsigned int* neighborX,
@@ -2198,15 +2174,12 @@ extern "C" void QADDev7(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QAD7<<< gridQ, threads >>> (     nx,
-                                       ny,
-                                       DD,
+      QAD7<<< gridQ, threads >>> (     DD,
                                        DD7,
                                        temp,
                                        diffusivity,
                                        k_Q,
                                        QQ,
-                                       sizeQ,
                                        numberOfBCnodes,
                                        om1,
                                        neighborX,
@@ -2261,7 +2234,7 @@ extern "C" void ADSlipVelDevComp(
 	real * distributionsAD,
 	int* QindexArray,
 	real * Qarrays,
-	uint numberOfQs,
+	uint numberOfBCnodes,
 	real omegaDiffusivity,
 	uint * neighborX,
 	uint * neighborY,
@@ -2269,7 +2242,7 @@ extern "C" void ADSlipVelDevComp(
 	uint size_Mat,
 	bool isEvenTimestep)
 {
-	int Grid = (numberOfQs / numberOfThreads) + 1;
+	int Grid = (numberOfBCnodes / numberOfThreads) + 1;
 	dim3 gridQ(Grid, 1, 1);
 	dim3 threads(numberOfThreads, 1, 1);
 
@@ -2281,7 +2254,7 @@ extern "C" void ADSlipVelDevComp(
 		distributionsAD,
 		QindexArray,
 		Qarrays,
-		numberOfQs,
+		numberOfBCnodes,
 		omegaDiffusivity,
 		neighborX,
 		neighborY,
@@ -2293,15 +2266,12 @@ extern "C" void ADSlipVelDevComp(
 //////////////////////////////////////////////////////////////////////////
 
 extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
-								   int nx,
-								   int ny,
 								   real* DD,
 								   real* DD27,
 								   real* temp,
 								   real diffusivity,
 								   int* k_Q,
 								   real* QQ,
-								   unsigned int sizeQ,
 								   unsigned int numberOfBCnodes,
 								   real om1,
 								   unsigned int* neighborX,
@@ -2325,15 +2295,13 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADDirichlet27<<< gridQ, threads >>> (   nx,
-											   ny,
+      QADDirichlet27<<< gridQ, threads >>> (
 											   DD,
 											   DD27,
 											   temp,
 											   diffusivity,
 											   k_Q,
 											   QQ,
-											   sizeQ,
 											   numberOfBCnodes,
 											   om1,
 											   neighborX,
@@ -2345,15 +2313,12 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADBBDev27(unsigned int numberOfThreads,
-                           int nx,
-                           int ny,
                            real* DD,
                            real* DD27,
                            real* temp,
                            real diffusivity,
                            int* k_Q,
                            real* QQ,
-                           unsigned int sizeQ,
                            unsigned int numberOfBCnodes,
                            real om1,
                            unsigned int* neighborX,
@@ -2377,15 +2342,12 @@ extern "C" void QADBBDev27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADBB27<<< gridQ, threads >>> (  nx,
-                                       ny,
-                                       DD,
+      QADBB27<<< gridQ, threads >>> (  DD,
                                        DD27,
                                        temp,
                                        diffusivity,
                                        k_Q,
                                        QQ,
-                                       sizeQ,
                                        numberOfBCnodes,
                                        om1,
                                        neighborX,
@@ -2397,15 +2359,12 @@ extern "C" void QADBBDev27(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* DD,
 									real* DD7,
 									real* temp,
 									real diffusivity,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
@@ -2429,15 +2388,13 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QNoSlipADincomp7<<< gridQ, threads >>> ( nx,
-											   ny,
+      QNoSlipADincomp7<<< gridQ, threads >>> (
 											   DD,
 											   DD7,
 											   temp,
 											   diffusivity,
 											   k_Q,
 											   QQ,
-											   sizeQ,
 											   numberOfBCnodes,
 											   om1,
 											   neighborX,
@@ -2449,15 +2406,12 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
-									   int nx,
-									   int ny,
 									   real* DD,
 									   real* DD27,
 									   real* temp,
 									   real diffusivity,
 									   int* k_Q,
 									   real* QQ,
-									   unsigned int sizeQ,
 									   unsigned int numberOfBCnodes,
 									   real om1,
 									   unsigned int* neighborX,
@@ -2481,15 +2435,13 @@ extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QNoSlipADincomp27<<< gridQ, threads >>> (nx,
-											   ny,
+      QNoSlipADincomp27<<< gridQ, threads >>> (
 											   DD,
 											   DD27,
 											   temp,
 											   diffusivity,
 											   k_Q,
 											   QQ,
-											   sizeQ,
 											   numberOfBCnodes,
 											   om1,
 											   neighborX,
@@ -2501,8 +2453,6 @@ extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
-								   int nx,
-								   int ny,
 								   real* DD,
 								   real* DD7,
 								   real* temp,
@@ -2510,7 +2460,6 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real diffusivity,
 								   int* k_Q,
 								   real* QQ,
-								   unsigned int sizeQ,
 								   unsigned int numberOfBCnodes,
 								   real om1,
 								   unsigned int* neighborX,
@@ -2534,8 +2483,7 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADVeloIncomp7<<< gridQ, threads >>> (   nx,
-											   ny,
+      QADVeloIncomp7<<< gridQ, threads >>> ( 
 											   DD,
 											   DD7,
 											   temp,
@@ -2543,7 +2491,6 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 											   diffusivity,
 											   k_Q,
 											   QQ,
-											   sizeQ,
 											   numberOfBCnodes,
 											   om1,
 											   neighborX,
@@ -2555,8 +2502,6 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -2564,7 +2509,6 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
 									  real diffusivity,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -2588,8 +2532,7 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADVeloIncomp27<<< gridQ, threads >>> ( nx,
-											  ny,
+      QADVeloIncomp27<<< gridQ, threads >>> (
 											  DD,
 											  DD27,
 											  temp,
@@ -2597,7 +2540,6 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
 											  diffusivity,
 											  k_Q,
 											  QQ,
-											  sizeQ,
 											  numberOfBCnodes,
 											  om1,
 											  neighborX,
@@ -2608,9 +2550,7 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
       getLastCudaError("QADVeloIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressIncompDev7(   unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
+extern "C" void QADPressIncompDev7( unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD7,
 									  real* temp,
@@ -2618,7 +2558,6 @@ extern "C" void QADPressIncompDev7(   unsigned int numberOfThreads,
 									  real diffusivity,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -2642,8 +2581,7 @@ extern "C" void QADPressIncompDev7(   unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADPressIncomp7<<< gridQ, threads >>>(   nx,
-											   ny,
+      QADPressIncomp7<<< gridQ, threads >>>(
 											   DD,
 											   DD7,
 											   temp,
@@ -2651,7 +2589,6 @@ extern "C" void QADPressIncompDev7(   unsigned int numberOfThreads,
 											   diffusivity,
 											   k_Q,
 											   QQ,
-											   sizeQ,
 											   numberOfBCnodes,
 											   om1,
 											   neighborX,
@@ -2663,8 +2600,6 @@ extern "C" void QADPressIncompDev7(   unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -2672,7 +2607,6 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real diffusivity,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -2696,8 +2630,7 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QADPressIncomp27<<< gridQ, threads >>>( nx,
-											  ny,
+      QADPressIncomp27<<< gridQ, threads >>>(
 											  DD,
 											  DD27,
 											  temp,
@@ -2705,7 +2638,6 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
 											  diffusivity,
 											  k_Q,
 											  QQ,
-											  sizeQ,
 											  numberOfBCnodes,
 											  om1,
 											  neighborX,
@@ -2722,7 +2654,6 @@ extern "C" void QDev27( unsigned int numberOfThreads,
                         real* DD,
                         int* k_Q,
                         real* QQ,
-                        unsigned int sizeQ,
                         unsigned int numberOfBCnodes,
                         real om1,
                         unsigned int* neighborX,
@@ -2751,7 +2682,6 @@ extern "C" void QDev27( unsigned int numberOfThreads,
                                        DD,
                                        k_Q,
                                        QQ,
-                                       sizeQ,
                                        numberOfBCnodes,
                                        om1,
                                        neighborX,
@@ -2763,18 +2693,15 @@ extern "C" void QDev27( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QDevComp27( unsigned int numberOfThreads,
-							int nx,
-							int ny,
-							real* DD,
-							int* k_Q,
-							real* QQ,
-							unsigned int sizeQ,
+							real* distribution,
+							int* subgridDistanceIndices,
+							real* subgridDistances,
 							unsigned int numberOfBCnodes,
-							real om1,
+							real omega,
 							unsigned int* neighborX,
 							unsigned int* neighborY,
 							unsigned int* neighborZ,
-							unsigned int size_Mat,
+							unsigned int numberOfLBnodes,
 							bool isEvenTimestep)
 {
    int Grid = (numberOfBCnodes / numberOfThreads)+1;
@@ -2792,18 +2719,16 @@ extern "C" void QDevComp27( unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QDeviceComp27<<< gridQ, threads >>> (nx,
-										   ny,
-										   DD,
-										   k_Q,
-										   QQ,
-										   sizeQ,
+      QDeviceComp27<<< gridQ, threads >>> (
+										   distribution,
+										   subgridDistanceIndices,
+										   subgridDistances,
 										   numberOfBCnodes,
-										   om1,
+										   omega,
 										   neighborX,
 										   neighborY,
 										   neighborZ,
-										   size_Mat,
+										   numberOfLBnodes,
 										   isEvenTimestep);
       getLastCudaError("QDeviceComp27 execution failed");
 }
@@ -2812,7 +2737,6 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* geom,
@@ -2841,7 +2765,6 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
    QDeviceCompThinWallsPartOne27 <<< gridQ, threads >>> (DD,
 														 k_Q,
 														 QQ,
-														 sizeQ,
 														 numberOfBCnodes,
 														 om1,
 														 neighborX,
@@ -2854,7 +2777,6 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
    QThinWallsPartTwo27 <<< gridQ, threads >>> ( DD,
 												k_Q,
 												QQ,
-												sizeQ,
 												numberOfBCnodes,
 												geom,
 												neighborX,
@@ -2873,7 +2795,6 @@ extern "C" void QDev3rdMomentsComp27(   unsigned int numberOfThreads,
 										real* DD,
 										int* k_Q,
 										real* QQ,
-										unsigned int sizeQ,
 										unsigned int numberOfBCnodes,
 										real om1,
 										unsigned int* neighborX,
@@ -2902,7 +2823,6 @@ extern "C" void QDev3rdMomentsComp27(   unsigned int numberOfThreads,
 													   DD,
 													   k_Q,
 													   QQ,
-													   sizeQ,
 													   numberOfBCnodes,
 													   om1,
 													   neighborX,
@@ -2914,12 +2834,9 @@ extern "C" void QDev3rdMomentsComp27(   unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
@@ -2943,12 +2860,10 @@ extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QDeviceIncompHighNu27<<< gridQ, threads >>> (nx,
-												   ny,
+      QDeviceIncompHighNu27<<< gridQ, threads >>> (
 												   DD,
 												   k_Q,
 												   QQ,
-												   sizeQ,
 												   numberOfBCnodes,
 												   om1,
 												   neighborX,
@@ -2960,12 +2875,9 @@ extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
-									int nx,
-									int ny,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
@@ -2989,12 +2901,10 @@ extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QDeviceCompHighNu27<<< gridQ, threads >>> (  nx,
-												   ny,
+      QDeviceCompHighNu27<<< gridQ, threads >>> (
 												   DD,
 												   k_Q,
 												   QQ,
-												   sizeQ,
 												   numberOfBCnodes,
 												   om1,
 												   neighborX,
@@ -3012,8 +2922,7 @@ extern "C" void QVelDevicePlainBB27(unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
-									int numberOfBCnodes,
+									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
@@ -3042,7 +2951,6 @@ extern "C" void QVelDevicePlainBB27(unsigned int numberOfThreads,
 												DD,
 												k_Q,
 												QQ,
-												sizeQ,
 												numberOfBCnodes,
 												om1,
 												neighborX,
@@ -3053,15 +2961,14 @@ extern "C" void QVelDevicePlainBB27(unsigned int numberOfThreads,
       getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDeviceCouhette27(unsigned int numberOfThreads,
+extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
-									int numberOfBCnodes,
+									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
 									unsigned int* neighborY,
@@ -3084,13 +2991,12 @@ extern "C" void QVelDeviceCouhette27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDevCouhette27<<< gridQ, threads >>> ( vx,
+      QVelDevCouette27<<< gridQ, threads >>> ( vx,
 												vy,
 												vz,
 												DD,
 												k_Q,
 												QQ,
-												sizeQ,
 												numberOfBCnodes,
 												om1,
 												neighborX,
@@ -3110,7 +3016,6 @@ extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
 								  real* DD,
 								  int* k_Q,
 								  real* QQ,
-								  unsigned int sizeQ,
 								  unsigned int numberOfBCnodes,
 								  real om1,
 								  real Phi,
@@ -3147,7 +3052,6 @@ extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
                                           DD,
                                           k_Q,
                                           QQ,
-                                          sizeQ,
                                           numberOfBCnodes,
                                           om1,
 										  Phi,
@@ -3172,7 +3076,6 @@ extern "C" void QVelDev27(unsigned int numberOfThreads,
                           real* DD,
                           int* k_Q,
                           real* QQ,
-                          unsigned int sizeQ,
                           unsigned int numberOfBCnodes,
                           real om1,
                           unsigned int* neighborX,
@@ -3204,7 +3107,6 @@ extern "C" void QVelDev27(unsigned int numberOfThreads,
                                           DD,
                                           k_Q,
                                           QQ,
-                                          sizeQ,
                                           numberOfBCnodes,
                                           om1,
                                           neighborX,
@@ -3216,15 +3118,12 @@ extern "C" void QVelDev27(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* vx,
 									  real* vy,
 									  real* vz,
 									  real* DD,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -3248,15 +3147,13 @@ extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDeviceCompPlusSlip27<<< gridQ, threads >>> (nx,
-													  ny,
+      QVelDeviceCompPlusSlip27<<< gridQ, threads >>> (
 													  vx,
 													  vy,
 													  vz,
 													  DD,
 													  k_Q,
 													  QQ,
-													  sizeQ,
 													  numberOfBCnodes,
 													  om1,
 													  neighborX,
@@ -3274,8 +3171,7 @@ extern "C" void QVelDevComp27(unsigned int numberOfThreads,
                               real* distribution,
                               int* subgridDistanceIndices,
                               real* subgridDistances,
-                              unsigned int numberOfSubgridIndices,
-                              int numberOfBCnodes,
+                              unsigned int numberOfBCnodes,
                               real omega,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
@@ -3293,7 +3189,6 @@ extern "C" void QVelDevComp27(unsigned int numberOfThreads,
                                distribution,
                                subgridDistanceIndices,
                                subgridDistances,
-                               numberOfSubgridIndices,
                                numberOfBCnodes,
                                omega,
                                neighborX,
@@ -3311,14 +3206,13 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* DD,
 							           int* k_Q,
 							           real* QQ,
-							           unsigned int sizeQ,
 							           unsigned int numberOfBCnodes,
 							           real om1,
-									   unsigned int* geom,
+									     unsigned int* geom,
 							           unsigned int* neighborX,
 							           unsigned int* neighborY,
 							           unsigned int* neighborZ,
-									   unsigned int* neighborWSB,
+									     unsigned int* neighborWSB,
 							           unsigned int size_Mat,
 							           bool isEvenTimestep)
 {
@@ -3337,13 +3231,12 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDeviceCompThinWallsPartOne27<<< gridQ, threads >>> (vx,
+   QVelDeviceCompThinWallsPartOne27<<< gridQ, threads >>> (vx,
 											                  vy,
 											                  vz,
 											                  DD,
 											                  k_Q,
 											                  QQ,
-											                  sizeQ,
 											                  numberOfBCnodes,
 											                  om1,
 											                  neighborX,
@@ -3351,87 +3244,66 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 											                  neighborZ,
 											                  size_Mat,
 											                  isEvenTimestep);
-      getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
-
-	  QThinWallsPartTwo27 <<< gridQ, threads >>> (DD,
-											      k_Q,
-											      QQ,
-											      sizeQ,
-											      numberOfBCnodes,
-                                                  geom,
-											      neighborX,
-											      neighborY,
-											      neighborZ,
-                                                  neighborWSB,
-											      size_Mat,
-											      isEvenTimestep);
-      getLastCudaError("QThinWallsPartTwo27 execution failed");
-}
-//////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompZeroPress27(   unsigned int numberOfThreads,
-										  int nx,
-										  int ny,
-										  real* vx,
-										  real* vy,
-										  real* vz,
-										  real* DD,
-										  int* k_Q,
-										  real* QQ,
-										  unsigned int sizeQ,
-										  int kArray,
-										  real om1,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
-										  bool isEvenTimestep)
+   getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
+
+	QThinWallsPartTwo27 <<< gridQ, threads >>> (
+       DD,
+       k_Q,
+       QQ,
+       numberOfBCnodes,
+       geom,
+       neighborX,
+       neighborY,
+       neighborZ,
+       neighborWSB,
+       size_Mat,
+       isEvenTimestep);
+   getLastCudaError("QThinWallsPartTwo27 execution failed");
+}
+
+extern "C" void QVelDevCompZeroPress27(
+   unsigned int numberOfThreads,
+   real* velocityX,
+   real* velocityY,
+   real* velocityZ,
+   real* distribution,
+   int* subgridDistanceIndices,
+   real* subgridDistances,
+   unsigned int numberOfBCnodes,
+   real omega,
+   unsigned int* neighborX,
+   unsigned int* neighborY,
+   unsigned int* neighborZ,
+   unsigned int numberOfLBnodes,
+   bool isEvenTimestep)
 {
-   //int Grid = kArray / numberOfThreads;
-   int Grid = (sizeQ / numberOfThreads)+1;
-   int Grid1, Grid2;
-   if (Grid>512)
-   {
-      Grid1 = 512;
-      Grid2 = (Grid/Grid1)+1;
-   }
-   else
-   {
-      Grid1 = 1;
-      Grid2 = Grid;
-   }
-   dim3 gridQ(Grid1, Grid2);
-   //dim3 gridQ(Grid, 1, 1);
+   dim3 grid = vf::cuda::getCudaGrid(numberOfThreads, numberOfBCnodes);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDeviceCompZeroPress27<<< gridQ, threads >>> (   nx,
-														  ny,
-														  vx,
-														  vy,
-														  vz,
-														  DD,
-														  k_Q,
-														  QQ,
-														  sizeQ,
-														  //numberOfBCnodes,
-														  om1,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  size_Mat,
-														  isEvenTimestep);
-      getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
+   QVelDeviceCompZeroPress27<<< grid, threads >>> (
+      velocityX,
+      velocityY,
+      velocityZ,
+      distribution,
+      subgridDistanceIndices,
+      subgridDistances,
+      numberOfBCnodes,
+      omega,
+      neighborX,
+      neighborY,
+      neighborZ,
+      numberOfLBnodes,
+      isEvenTimestep);
+   getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* vx,
 									  real* vy,
 									  real* vz,
 									  real* DD,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -3455,15 +3327,13 @@ extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDeviceIncompHighNu27<<< gridQ, threads >>> (nx,
-													  ny,
+      QVelDeviceIncompHighNu27<<< gridQ, threads >>> (
 													  vx,
 													  vy,
 													  vz,
 													  DD,
 													  k_Q,
 													  QQ,
-													  sizeQ,
 													  numberOfBCnodes,
 													  om1,
 													  neighborX,
@@ -3475,15 +3345,12 @@ extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
-									  int nx,
-									  int ny,
 									  real* vx,
 									  real* vy,
 									  real* vz,
 									  real* DD,
 									  int* k_Q,
 									  real* QQ,
-									  unsigned int sizeQ,
 									  unsigned int numberOfBCnodes,
 									  real om1,
 									  unsigned int* neighborX,
@@ -3507,15 +3374,13 @@ extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QVelDeviceCompHighNu27<<< gridQ, threads >>> (  nx,
-													  ny,
+      QVelDeviceCompHighNu27<<< gridQ, threads >>> (
 													  vx,
 													  vy,
 													  vz,
 													  DD,
 													  k_Q,
 													  QQ,
-													  sizeQ,
 													  numberOfBCnodes,
 													  om1,
 													  neighborX,
@@ -3620,7 +3485,7 @@ extern "C" void QSlipDev27(unsigned int numberOfThreads,
                            real* DD,
                            int* k_Q,
                            real* QQ,
-                           unsigned int sizeQ,
+                           unsigned int numberOfBCnodes,
                            real om1,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
@@ -3628,7 +3493,7 @@ extern "C" void QSlipDev27(unsigned int numberOfThreads,
                            unsigned int size_Mat,
                            bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3646,7 +3511,7 @@ extern "C" void QSlipDev27(unsigned int numberOfThreads,
       QSlipDevice27<<< gridQ, threads >>> (DD,
                                            k_Q,
                                            QQ,
-                                           sizeQ,
+                                           numberOfBCnodes,
                                            om1,
                                            neighborX,
                                            neighborY,
@@ -3660,7 +3525,7 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads,
 							   real* DD,
 							   int* k_Q,
 							   real* QQ,
-							   unsigned int sizeQ,
+							   unsigned int numberOfBCnodes,
 							   real om1,
 							   unsigned int* neighborX,
 							   unsigned int* neighborY,
@@ -3670,7 +3535,7 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads,
 							   unsigned int size_Mat,
 							   bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3690,7 +3555,7 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads,
       QSlipDeviceComp27TurbViscosity<<< gridQ, threads >>> (DD,
 											   k_Q,
 											   QQ,
-											   sizeQ,
+											   numberOfBCnodes,
 											   om1,
 											   neighborX,
 											   neighborY,
@@ -3705,7 +3570,7 @@ extern "C" void QSlipDevComp27(unsigned int numberOfThreads,
       QSlipDeviceComp27<<< gridQ, threads >>> (DD,
 											   k_Q,
 											   QQ,
-											   sizeQ,
+											   numberOfBCnodes,
 											   om1,
 											   neighborX,
 											   neighborY,
@@ -3720,7 +3585,7 @@ extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
-								   unsigned int sizeQ,
+								   unsigned int numberOfBCnodes,
 								   real om1,
 								   real* NormalX,
 								   real* NormalY,
@@ -3731,7 +3596,7 @@ extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
 								   unsigned int size_Mat,
 								   bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3749,7 +3614,7 @@ extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
       QSlipGeomDeviceComp27<<< gridQ, threads >>> (DD,
 												   k_Q,
 												   QQ,
-												   sizeQ,
+												   numberOfBCnodes,
 												   om1,
 												   NormalX,
 												   NormalY,
@@ -3766,7 +3631,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
-								   unsigned int sizeQ,
+								   unsigned int numberOfBCnodes,
 								   real om1,
 								   real* NormalX,
 								   real* NormalY,
@@ -3777,7 +3642,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   unsigned int size_Mat,
 								   bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3795,7 +3660,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
       QSlipNormDeviceComp27<<< gridQ, threads >>> (DD,
 												   k_Q,
 												   QQ,
-												   sizeQ,
+												   numberOfBCnodes,
 												   om1,
 												   NormalX,
 												   NormalY,
@@ -3813,7 +3678,7 @@ extern "C" void QStressDevComp27(unsigned int numberOfThreads,
 							   int* k_Q,
                         int* k_N,
 							   real* QQ,
-							   unsigned int sizeQ,
+							   unsigned int numberOfBCnodes,
 							   real om1,
                         real* turbViscosity,
                         real* vx,
@@ -3841,7 +3706,7 @@ extern "C" void QStressDevComp27(unsigned int numberOfThreads,
 							   unsigned int size_Mat,
 							   bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3860,7 +3725,7 @@ extern "C" void QStressDevComp27(unsigned int numberOfThreads,
 											   k_Q,
                                     k_N,
 											   QQ,
-											   sizeQ,
+											   numberOfBCnodes,
 											   om1,
                                     turbViscosity,
                                     vx,
@@ -3896,7 +3761,7 @@ extern "C" void BBStressDev27(unsigned int numberOfThreads,
 							   int* k_Q,
                         int* k_N,
 							   real* QQ,
-							   unsigned int sizeQ,
+							   unsigned int numberOfBCnodes,
                         real* vx,
                         real* vy,
                         real* vz,
@@ -3922,7 +3787,7 @@ extern "C" void BBStressDev27(unsigned int numberOfThreads,
 							   unsigned int size_Mat,
 							   bool isEvenTimestep)
 {
-   int Grid = (sizeQ / numberOfThreads)+1;
+   int Grid = (numberOfBCnodes / numberOfThreads)+1;
    int Grid1, Grid2;
    if (Grid>512)
    {
@@ -3941,7 +3806,7 @@ extern "C" void BBStressDev27(unsigned int numberOfThreads,
 											   k_Q,
                                     k_N,
 											   QQ,
-											   sizeQ,
+											   numberOfBCnodes,
                                     vx,
                                     vy,
                                     vz,
@@ -3970,13 +3835,10 @@ extern "C" void BBStressDev27(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QPressDev27(unsigned int numberOfThreads,
-                             int nx,
-                             int ny,
                              real* rhoBC,
                              real* DD,
                              int* k_Q,
                              real* QQ,
-                             unsigned int sizeQ,
                              unsigned int numberOfBCnodes,
                              real om1,
                              unsigned int* neighborX,
@@ -4000,13 +3862,10 @@ extern "C" void QPressDev27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      QPressDevice27<<< gridQ, threads >>> (nx,
-                                             ny,
-                                             rhoBC,
+      QPressDevice27<<< gridQ, threads >>> ( rhoBC,
                                              DD,
                                              k_Q,
                                              QQ,
-                                             sizeQ,
                                              numberOfBCnodes,
                                              om1,
                                              neighborX,
@@ -4315,11 +4174,11 @@ extern "C" void QPressDevIncompNEQ27(unsigned int numberOfThreads,
 //////////////////////////////////////////////////////////////////////////
 extern "C" void QPressDevNEQ27(  unsigned int numberOfThreads,
                                      real* rhoBC,
-                                     real* DD,
-                                     int* k_Q,
-                                     int* k_N,
+                                     real* distribution,
+                                     int* bcNodeIndices,
+                                     int* bcNeighborIndices,
                                      unsigned int numberOfBCnodes,
-                                     real om1,
+                                     real omega1,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
@@ -4342,11 +4201,11 @@ extern "C" void QPressDevNEQ27(  unsigned int numberOfThreads,
    dim3 threads(numberOfThreads, 1, 1 );
 
       QPressDeviceNEQ27<<< gridQ, threads >>> ( rhoBC,
-                                                DD,
-                                                k_Q,
-                                                k_N,
+                                                distribution,
+                                                bcNodeIndices,
+                                                bcNeighborIndices,
                                                 numberOfBCnodes,
-                                                om1,
+                                                omega1,
                                                 neighborX,
                                                 neighborY,
                                                 neighborZ,
@@ -4483,7 +4342,6 @@ extern "C" void BBDev27( unsigned int numberOfThreads,
                        real* DD,
                        int* k_Q,
                        real* QQ,
-                       unsigned int sizeQ,
                        unsigned int numberOfBCnodes,
                        real om1,
                        unsigned int* neighborX,
@@ -4512,7 +4370,6 @@ extern "C" void BBDev27( unsigned int numberOfThreads,
                                           DD,
                                           k_Q,
                                           QQ,
-                                          sizeQ,
                                           numberOfBCnodes,
                                           om1,
                                           neighborX,
@@ -4528,7 +4385,6 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
-									unsigned int sizeQ,
 									unsigned int numberOfBCnodes,
 									real om1,
 									unsigned int* neighborX,
@@ -4556,7 +4412,6 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 													DD,
 													k_Q,
 													QQ,
-													sizeQ,
 													numberOfBCnodes,
 													om1,
 													neighborX,
@@ -7264,15 +7119,12 @@ extern "C" void setRecvGsDevF3(
 }
 //////////////////////////////////////////////////////////////////////////
 extern "C" void WallFuncDev27(unsigned int numberOfThreads,
-							  int nx,
-							  int ny,
 							  real* vx,
 							  real* vy,
 							  real* vz,
 							  real* DD,
 							  int* k_Q,
 							  real* QQ,
-							  unsigned int sizeQ,
 							  unsigned int numberOfBCnodes,
 							  real om1,
 							  unsigned int* neighborX,
@@ -7296,15 +7148,13 @@ extern "C" void WallFuncDev27(unsigned int numberOfThreads,
    dim3 gridQ(Grid1, Grid2);
    dim3 threads(numberOfThreads, 1, 1 );
 
-      WallFunction27<<< gridQ, threads >>> (  nx,
-											  ny,
+      WallFunction27<<< gridQ, threads >>> (
 											  vx,
 											  vy,
 											  vz,
 											  DD,
 											  k_Q,
 											  QQ,
-											  sizeQ,
 											  numberOfBCnodes,
 											  om1,
 											  neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
index af74dd9a75f1ab5bab89193cd14270dfb2b8d515..df24809593ffbfc79b2664773a39f156c28e5f7a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
@@ -9,6 +9,7 @@
 #include "LBM/LB.h" 
 #include "LBM/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
+#include "KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 
@@ -18,8 +19,7 @@ extern "C" __global__ void QDevice3rdMomentsComp27(  int inx,
 													 real* DD, 
 													 int* k_Q, 
 													 real* QQ,
-													 unsigned int sizeQ,
-													 int numberOfNodes, 
+													 unsigned int numberOfBCnodes, 
 													 real om1, 
 													 unsigned int* neighborX,
 													 unsigned int* neighborY,
@@ -99,7 +99,7 @@ extern "C" __global__ void QDevice3rdMomentsComp27(  int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k < numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -107,32 +107,32 @@ extern "C" __global__ void QDevice3rdMomentsComp27(  int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -564,7 +564,7 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
 												 real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int sizeQ,
+												 unsigned int  numberOfBCnodes,
 												 int numberOfNodes, 
 												 real om1, 
 												 unsigned int* neighborX,
@@ -653,32 +653,32 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -1057,13 +1057,11 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceCompHighNu27(  int inx,
-												 int iny,
+extern "C" __global__ void QDeviceCompHighNu27(
 												 real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int sizeQ,
-												 int numberOfNodes, 
+												 unsigned int numberOfBCnodes, 
 												 real om1, 
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
@@ -1143,7 +1141,7 @@ extern "C" __global__ void QDeviceCompHighNu27(  int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -1151,32 +1149,32 @@ extern "C" __global__ void QDeviceCompHighNu27(  int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -1633,146 +1631,69 @@ extern "C" __global__ void QDeviceCompHighNu27(  int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceComp27(int inx,
-										 int iny,
-										 real* DD, 
-										 int* k_Q, 
-										 real* QQ,
-										 unsigned int sizeQ,
-										 int numberOfNodes, 
-										 real om1, 
+extern "C" __global__ void QDeviceComp27(
+										 real* distribution, 
+										 int* subgridDistanceIndices, 
+										 real* subgridDistances,
+										 unsigned int numberOfBCnodes, 
+										 real omega, 
 										 unsigned int* neighborX,
 										 unsigned int* neighborY,
 										 unsigned int* neighborZ,
-										 unsigned int size_Mat, 
+										 unsigned int numberOfLBnodes, 
 										 bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[dirE   ] = &DD[dirE   *size_Mat];
-      D.f[dirW   ] = &DD[dirW   *size_Mat];
-      D.f[dirN   ] = &DD[dirN   *size_Mat];
-      D.f[dirS   ] = &DD[dirS   *size_Mat];
-      D.f[dirT   ] = &DD[dirT   *size_Mat];
-      D.f[dirB   ] = &DD[dirB   *size_Mat];
-      D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-      D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-      D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-      D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-      D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-      D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-      D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-      D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-      D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-      D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-      D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-      D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-      D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-      D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-      D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-      D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-      D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-   } 
-   else
-   {
-      D.f[dirW   ] = &DD[dirE   *size_Mat];
-      D.f[dirE   ] = &DD[dirW   *size_Mat];
-      D.f[dirS   ] = &DD[dirN   *size_Mat];
-      D.f[dirN   ] = &DD[dirS   *size_Mat];
-      D.f[dirB   ] = &DD[dirT   *size_Mat];
-      D.f[dirT   ] = &DD[dirB   *size_Mat];
-      D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-      D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-      D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-      D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-      D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-      D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-      D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-      D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-      D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-      D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-      D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-      D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-      D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-      D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-      D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-      D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-      D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-   }
+   //////////////////////////////////////////////////////////////////////////
+   //! The velocity boundary condition is executed in the following steps
+   //!
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
 
    const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k < numberOfBCnodes)
    {
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-            *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-            *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-            *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-            *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+      
       ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int numberOfNodesK  = k_Q[k];
-      unsigned int kzero= numberOfNodesK;
-      unsigned int ke   = numberOfNodesK;
-      unsigned int kw   = neighborX[numberOfNodesK];
-      unsigned int kn   = numberOfNodesK;
-      unsigned int ks   = neighborY[numberOfNodesK];
-      unsigned int kt   = numberOfNodesK;
-      unsigned int kb   = neighborZ[numberOfNodesK];
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
       unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = numberOfNodesK;
+      unsigned int kne  = indexOfBCnode;
       unsigned int kse  = ks;
       unsigned int knw  = kw;
       unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = numberOfNodesK;
+      unsigned int kte  = indexOfBCnode;
       unsigned int kbe  = kb;
       unsigned int ktw  = kw;
       unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = numberOfNodesK;
+      unsigned int ktn  = indexOfBCnode;
       unsigned int kbn  = kb;
       unsigned int kts  = ks;
       unsigned int ktse = ks;
@@ -1781,334 +1702,274 @@ extern "C" __global__ void QDeviceComp27(int inx,
       unsigned int kbse = kbs;
       unsigned int ktsw = ksw;
       unsigned int kbne = kb;
-      unsigned int ktne = numberOfNodesK;
+      unsigned int ktne = indexOfBCnode;
       unsigned int kbsw = neighborZ[ksw];
+
       ////////////////////////////////////////////////////////////////////////////////
-      real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
-            f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[dirE   ])[ke   ];
+      real f_E    = (dist.f[dirW   ])[kw   ];
+      real f_S    = (dist.f[dirN   ])[kn   ];
+      real f_N    = (dist.f[dirS   ])[ks   ];
+      real f_B    = (dist.f[dirT   ])[kt   ];
+      real f_T    = (dist.f[dirB   ])[kb   ];
+      real f_SW   = (dist.f[dirNE  ])[kne  ];
+      real f_NE   = (dist.f[dirSW  ])[ksw  ];
+      real f_NW   = (dist.f[dirSE  ])[kse  ];
+      real f_SE   = (dist.f[dirNW  ])[knw  ];
+      real f_BW   = (dist.f[dirTE  ])[kte  ];
+      real f_TE   = (dist.f[dirBW  ])[kbw  ];
+      real f_TW   = (dist.f[dirBE  ])[kbe  ];
+      real f_BE   = (dist.f[dirTW  ])[ktw  ];
+      real f_BS   = (dist.f[dirTN  ])[ktn  ];
+      real f_TN   = (dist.f[dirBS  ])[kbs  ];
+      real f_TS   = (dist.f[dirBN  ])[kbn  ];
+      real f_BN   = (dist.f[dirTS  ])[kts  ];
+      real f_BSW  = (dist.f[dirTNE ])[ktne ];
+      real f_BNE  = (dist.f[dirTSW ])[ktsw ];
+      real f_BNW  = (dist.f[dirTSE ])[ktse ];
+      real f_BSE  = (dist.f[dirTNW ])[ktnw ];
+      real f_TSW  = (dist.f[dirBNE ])[kbne ];
+      real f_TNE  = (dist.f[dirBSW ])[kbsw ];
+      real f_TNW  = (dist.f[dirBSE ])[kbse ];
+      real f_TSE  = (dist.f[dirBNW ])[kbnw ];
 
-      f_W    = (D.f[dirE   ])[ke   ];
-      f_E    = (D.f[dirW   ])[kw   ];
-      f_S    = (D.f[dirN   ])[kn   ];
-      f_N    = (D.f[dirS   ])[ks   ];
-      f_B    = (D.f[dirT   ])[kt   ];
-      f_T    = (D.f[dirB   ])[kb   ];
-      f_SW   = (D.f[dirNE  ])[kne  ];
-      f_NE   = (D.f[dirSW  ])[ksw  ];
-      f_NW   = (D.f[dirSE  ])[kse  ];
-      f_SE   = (D.f[dirNW  ])[knw  ];
-      f_BW   = (D.f[dirTE  ])[kte  ];
-      f_TE   = (D.f[dirBW  ])[kbw  ];
-      f_TW   = (D.f[dirBE  ])[kbe  ];
-      f_BE   = (D.f[dirTW  ])[ktw  ];
-      f_BS   = (D.f[dirTN  ])[ktn  ];
-      f_TN   = (D.f[dirBS  ])[kbs  ];
-      f_TS   = (D.f[dirBN  ])[kbn  ];
-      f_BN   = (D.f[dirTS  ])[kts  ];
-      f_BSW  = (D.f[dirTNE ])[ktne ];
-      f_BNE  = (D.f[dirTSW ])[ktsw ];
-      f_BNW  = (D.f[dirTSE ])[ktse ];
-      f_BSE  = (D.f[dirTNW ])[ktnw ];
-      f_TSW  = (D.f[dirBNE ])[kbne ];
-      f_TNE  = (D.f[dirBSW ])[kbsw ];
-      f_TNW  = (D.f[dirBSE ])[kbse ];
-      f_TSE  = (D.f[dirBNW ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
-      real vx1, vx2, vx3, drho, feq, q;
-      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-				f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[dirZERO])[kzero]); 
+      //! - Calculate macroscopic quantities
+      //!
+      real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                  f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[dirZERO])[kzero]); 
 
-      vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                (f_E - f_W)) / (c1o1 + drho); 
+      real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                   (f_E - f_W)) / (c1o1 + drho);          
 
+      real vx2  = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                   (f_N - f_S)) / (c1o1 + drho); 
 
-      vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE))  + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                  ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                  (f_N - f_S)) / (c1o1 + drho); 
+      real vx3  = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                   (f_T - f_B)) / (c1o1 + drho); 
 
-      vx3    =    (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-                  (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                  (f_T - f_B)) / (c1o1 + drho); 
+      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
-      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distribution, numberOfLBnodes, !isEvenTimestep);
 
-      //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
+       ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      real feq, q, velocityLB, velocityBC;
+      q = (subgridD.q[dirE])[k];
+      if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
-         D.f[dirE   ] = &DD[dirE   *size_Mat];
-         D.f[dirW   ] = &DD[dirW   *size_Mat];
-         D.f[dirN   ] = &DD[dirN   *size_Mat];
-         D.f[dirS   ] = &DD[dirS   *size_Mat];
-         D.f[dirT   ] = &DD[dirT   *size_Mat];
-         D.f[dirB   ] = &DD[dirB   *size_Mat];
-         D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-         D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-         D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-         D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-         D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-         D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-         D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-         D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-         D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-         D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-         D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-         D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-         D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-         D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-         D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-         D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-         D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-      } 
-      else
-      {
-         D.f[dirW   ] = &DD[dirE   *size_Mat];
-         D.f[dirE   ] = &DD[dirW   *size_Mat];
-         D.f[dirS   ] = &DD[dirN   *size_Mat];
-         D.f[dirN   ] = &DD[dirS   *size_Mat];
-         D.f[dirB   ] = &DD[dirT   *size_Mat];
-         D.f[dirT   ] = &DD[dirB   *size_Mat];
-         D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-         D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-         D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-         D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-         D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-         D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-         D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-         D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-         D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-         D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-         D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-         D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-         D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-         D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-         D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-         D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-         D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-      }
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //Test
-         //(D.f[dirZERO])[k]=c1o10;
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  
-	  
-	  //ToDo anders klammern !!!!!!
-	  
-      q = q_dirE[k];
-      if (q>=c0o1 && q<=c1o1)
-      {
-         feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirW])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W))/(c1o1+q);
-         //(D.f[dirW])[kw]=zero;
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirW])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega, c2o27);
       }
 
-      q = q_dirW[k];
+      q = (subgridD.q[dirW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirE])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E))/(c1o1+q);
-         //(D.f[dirE])[ke]=zero;
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirE])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega, c2o27);
       }
 
-      q = q_dirN[k];
+      q = (subgridD.q[dirN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirS])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S))/(c1o1+q);
-         //(D.f[dirS])[ks]=zero;
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirS])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega, c2o27);
       }
 
-      q = q_dirS[k];
+      q = (subgridD.q[dirS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirN])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N))/(c1o1+q);
-         //(D.f[dirN])[kn]=zero;
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirN])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega, c2o27);
       }
 
-      q = q_dirT[k];
+      q = (subgridD.q[dirT])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirB])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B))/(c1o1+q);
-         //(D.f[dirB])[kb]=one;
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirB])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega, c2o27);
       }
 
-      q = q_dirB[k];
+      q = (subgridD.q[dirB])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirT])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T))/(c1o1+q);
-         //(D.f[dirT])[kt]=zero;
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         (dist.f[dirT])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega, c2o27);
       }
 
-      q = q_dirNE[k];
+      q = (subgridD.q[dirNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirSW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW))/(c1o1+q);
-         //(D.f[dirSW])[ksw]=zero;
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirSW])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega, c1o54);
       }
 
-      q = q_dirSW[k];
+      q = (subgridD.q[dirSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirNE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE))/(c1o1+q);
-         //(D.f[dirNE])[kne]=zero;
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirNE])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega, c1o54);
       }
 
-      q = q_dirSE[k];
+      q = (subgridD.q[dirSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirNW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW))/(c1o1+q);
-         //(D.f[dirNW])[knw]=zero;
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirNW])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega, c1o54);
       }
 
-      q = q_dirNW[k];
+      q = (subgridD.q[dirNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirSE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE))/(c1o1+q);
-         //(D.f[dirSE])[kse]=zero;
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirSE])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega, c1o54);
       }
 
-      q = q_dirTE[k];
+      q = (subgridD.q[dirTE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW))/(c1o1+q);
-         //(D.f[dirBW])[kbw]=zero;
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirBW])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega, c1o54);
       }
 
-      q = q_dirBW[k];
+      q = (subgridD.q[dirBW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE))/(c1o1+q);
-         //(D.f[dirTE])[kte]=zero;
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirTE])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega, c1o54);
       }
 
-      q = q_dirBE[k];
+      q = (subgridD.q[dirBE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW))/(c1o1+q);
-         //(D.f[dirTW])[ktw]=zero;
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirTW])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega, c1o54);
       }
 
-      q = q_dirTW[k];
+      q = (subgridD.q[dirTW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE))/(c1o1+q);
-         //(D.f[dirBE])[kbe]=zero;
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirBE])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega, c1o54);
       }
 
-      q = q_dirTN[k];
+      q = (subgridD.q[dirTN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS))/(c1o1+q);
-         //(D.f[dirBS])[kbs]=zero;
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirBS])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega, c1o54);
       }
 
-      q = q_dirBS[k];
+      q = (subgridD.q[dirBS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN))/(c1o1+q);
-         //(D.f[dirTN])[ktn]=zero;
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirTN])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega, c1o54);
       }
 
-      q = q_dirBN[k];
+      q = (subgridD.q[dirBN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS))/(c1o1+q);
-         //(D.f[dirTS])[kts]=zero;
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirTS])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega, c1o54);
       }
 
-      q = q_dirTS[k];
+      q = (subgridD.q[dirTS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN))/(c1o1+q);
-         //(D.f[dirBN])[kbn]=zero;
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         (dist.f[dirBN])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega, c1o54);
       }
 
-      q = q_dirTNE[k];
+      q = (subgridD.q[dirTNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q);
-         //(D.f[dirBSW])[kbsw]=zero;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirBSW])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega, c1o216);
       }
 
-      q = q_dirBSW[k];
+      q = (subgridD.q[dirBSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q);
-         //(D.f[dirTNE])[ktne]=zero;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirTNE])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega, c1o216);
       }
 
-      q = q_dirBNE[k];
+      q = (subgridD.q[dirBNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q);
-         //(D.f[dirTSW])[ktsw]=zero;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirTSW])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega, c1o216);
       }
 
-      q = q_dirTSW[k];
+      q = (subgridD.q[dirTSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q);
-         //(D.f[dirBNE])[kbne]=zero;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirBNE])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega, c1o216);
       }
 
-      q = q_dirTSE[k];
+      q = (subgridD.q[dirTSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q);
-         //(D.f[dirBNW])[kbnw]=zero;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirBNW])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega, c1o216);
       }
 
-      q = q_dirBNW[k];
+      q = (subgridD.q[dirBNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q);
-         //(D.f[dirTSE])[ktse]=zero;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirTSE])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega, c1o216);
       }
 
-      q = q_dirBSE[k];
+      q = (subgridD.q[dirBSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q);
-         //(D.f[dirTNW])[ktnw]=zero;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirTNW])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega, c1o216);
       }
 
-      q = q_dirTNW[k];
+      q = (subgridD.q[dirTNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q);
-         //(D.f[dirBSE])[kbse]=zero;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         (dist.f[dirBSE])[kbse] = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, omega, c1o216);
       }
    }
 }
@@ -2158,8 +2019,7 @@ extern "C" __global__ void QDevice27(int inx,
                                      real* DD, 
                                      int* k_Q, 
                                      real* QQ,
-                                     unsigned int sizeQ,
-                                     int numberOfNodes, 
+                                     unsigned int numberOfBCnodes, 
                                      real om1, 
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
@@ -2239,7 +2099,7 @@ extern "C" __global__ void QDevice27(int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -2247,32 +2107,32 @@ extern "C" __global__ void QDevice27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   *numberOfBCnodes];
+      q_dirW   = &QQ[dirW   *numberOfBCnodes];
+      q_dirN   = &QQ[dirN   *numberOfBCnodes];
+      q_dirS   = &QQ[dirS   *numberOfBCnodes];
+      q_dirT   = &QQ[dirT   *numberOfBCnodes];
+      q_dirB   = &QQ[dirB   *numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  *numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  *numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  *numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  *numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  *numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  *numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  *numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  *numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  *numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  *numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  *numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  *numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE *numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW *numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE *numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW *numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE *numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW *numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE *numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW *numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -3094,8 +2954,7 @@ extern "C" __global__ void BBDevice27(int inx,
                                      real* DD, 
                                      int* k_Q, 
                                      real* QQ,
-                                     unsigned int sizeQ,
-                                     int numberOfNodes, 
+                                     unsigned int numberOfBCnodes, 
                                      real om1, 
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
@@ -3175,7 +3034,7 @@ extern "C" __global__ void BBDevice27(int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -3183,32 +3042,32 @@ extern "C" __global__ void BBDevice27(int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   *numberOfBCnodes];
+      q_dirW   = &QQ[dirW   *numberOfBCnodes];
+      q_dirN   = &QQ[dirN   *numberOfBCnodes];
+      q_dirS   = &QQ[dirS   *numberOfBCnodes];
+      q_dirT   = &QQ[dirT   *numberOfBCnodes];
+      q_dirB   = &QQ[dirB   *numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  *numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  *numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  *numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  *numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  *numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  *numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  *numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  *numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  *numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  *numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  *numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  *numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE *numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW *numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE *numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW *numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE *numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW *numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE *numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW *numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
index c8ae62e256251c55460c7b8751b1268cc663fc03..117b6fec51255468785364cba617f6479f583f39 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
@@ -1924,7 +1924,7 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
 													  unsigned int numberOfParticles, 
 													  int* k_Q, 
 													  real* QQ,
-													  unsigned int sizeQ,
+													  unsigned int  numberOfBCnodes,
 													  real* NormalX,
 													  real* NormalY,
 													  real* NormalZ,
@@ -2006,7 +2006,7 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //const unsigned k = nx*(ny*z + y) + x;
    ////////////////////////////////////////////////////////////////////////////
 
-   //if(k < sizeQ)
+   //if(k <  numberOfBCnodes)
    //{
    //   ////////////////////////////////////////////////////////////////////////////////
    //   real *q_dirW, *q_dirS, *q_dirB;
@@ -2015,128 +2015,128 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
    // //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
    // //         *q_dirBSE, *q_dirBNW; 
-   // //   q_dirE   = &QQ[dirE   *sizeQ];
-   //    q_dirW   = &QQ[dirW   *sizeQ];
-   // //   q_dirN   = &QQ[dirN   *sizeQ];
-   //    q_dirS   = &QQ[dirS   *sizeQ];
-   // //   q_dirT   = &QQ[dirT   *sizeQ];
-   //    q_dirB   = &QQ[dirB   *sizeQ];
-   // //   q_dirNE  = &QQ[dirNE  *sizeQ];
-   // //   q_dirSW  = &QQ[dirSW  *sizeQ];
-   // //   q_dirSE  = &QQ[dirSE  *sizeQ];
-   // //   q_dirNW  = &QQ[dirNW  *sizeQ];
-   // //   q_dirTE  = &QQ[dirTE  *sizeQ];
-   // //   q_dirBW  = &QQ[dirBW  *sizeQ];
-   // //   q_dirBE  = &QQ[dirBE  *sizeQ];
-   // //   q_dirTW  = &QQ[dirTW  *sizeQ];
-   // //   q_dirTN  = &QQ[dirTN  *sizeQ];
-   // //   q_dirBS  = &QQ[dirBS  *sizeQ];
-   // //   q_dirBN  = &QQ[dirBN  *sizeQ];
-   // //   q_dirTS  = &QQ[dirTS  *sizeQ];
-   // //   q_dirTNE = &QQ[dirTNE *sizeQ];
-   // //   q_dirTSW = &QQ[dirTSW *sizeQ];
-   // //   q_dirTSE = &QQ[dirTSE *sizeQ];
-   // //   q_dirTNW = &QQ[dirTNW *sizeQ];
-   // //   q_dirBNE = &QQ[dirBNE *sizeQ];
-   // //   q_dirBSW = &QQ[dirBSW *sizeQ];
-   // //   q_dirBSE = &QQ[dirBSE *sizeQ];
-   // //   q_dirBNW = &QQ[dirBNW *sizeQ];
+   // //   q_dirE   = &QQ[dirE   * numberOfBCnodes];
+   //    q_dirW   = &QQ[dirW   * numberOfBCnodes];
+   // //   q_dirN   = &QQ[dirN   * numberOfBCnodes];
+   //    q_dirS   = &QQ[dirS   * numberOfBCnodes];
+   // //   q_dirT   = &QQ[dirT   * numberOfBCnodes];
+   //    q_dirB   = &QQ[dirB   * numberOfBCnodes];
+   // //   q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+   // //   q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+   // //   q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+   // //   q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+   // //   q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+   // //   q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+   // //   q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+   // //   q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+   // //   q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+   // //   q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+   // //   q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+   // //   q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+   // //   q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+   // //   q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+   // //   q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+   // //   q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+   // //   q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+   // //   q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+   // //   q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+   // //   q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
    // //           *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
    // //           *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
    // //           *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
    // //           *nx_dirBSE, *nx_dirBNW; 
-   // //   nx_dirE   = &NormalX[dirE   *sizeQ];
-   // //   nx_dirW   = &NormalX[dirW   *sizeQ];
-   // //   nx_dirN   = &NormalX[dirN   *sizeQ];
-   // //   nx_dirS   = &NormalX[dirS   *sizeQ];
-   // //   nx_dirT   = &NormalX[dirT   *sizeQ];
-   // //   nx_dirB   = &NormalX[dirB   *sizeQ];
-   // //   nx_dirNE  = &NormalX[dirNE  *sizeQ];
-   // //   nx_dirSW  = &NormalX[dirSW  *sizeQ];
-   // //   nx_dirSE  = &NormalX[dirSE  *sizeQ];
-   // //   nx_dirNW  = &NormalX[dirNW  *sizeQ];
-   // //   nx_dirTE  = &NormalX[dirTE  *sizeQ];
-   // //   nx_dirBW  = &NormalX[dirBW  *sizeQ];
-   // //   nx_dirBE  = &NormalX[dirBE  *sizeQ];
-   // //   nx_dirTW  = &NormalX[dirTW  *sizeQ];
-   // //   nx_dirTN  = &NormalX[dirTN  *sizeQ];
-   // //   nx_dirBS  = &NormalX[dirBS  *sizeQ];
-   // //   nx_dirBN  = &NormalX[dirBN  *sizeQ];
-   // //   nx_dirTS  = &NormalX[dirTS  *sizeQ];
-   // //   nx_dirTNE = &NormalX[dirTNE *sizeQ];
-   // //   nx_dirTSW = &NormalX[dirTSW *sizeQ];
-   // //   nx_dirTSE = &NormalX[dirTSE *sizeQ];
-   // //   nx_dirTNW = &NormalX[dirTNW *sizeQ];
-   // //   nx_dirBNE = &NormalX[dirBNE *sizeQ];
-   // //   nx_dirBSW = &NormalX[dirBSW *sizeQ];
-   // //   nx_dirBSE = &NormalX[dirBSE *sizeQ];
-   // //   nx_dirBNW = &NormalX[dirBNW *sizeQ];
+   // //   nx_dirE   = &NormalX[dirE   * numberOfBCnodes];
+   // //   nx_dirW   = &NormalX[dirW   * numberOfBCnodes];
+   // //   nx_dirN   = &NormalX[dirN   * numberOfBCnodes];
+   // //   nx_dirS   = &NormalX[dirS   * numberOfBCnodes];
+   // //   nx_dirT   = &NormalX[dirT   * numberOfBCnodes];
+   // //   nx_dirB   = &NormalX[dirB   * numberOfBCnodes];
+   // //   nx_dirNE  = &NormalX[dirNE  * numberOfBCnodes];
+   // //   nx_dirSW  = &NormalX[dirSW  * numberOfBCnodes];
+   // //   nx_dirSE  = &NormalX[dirSE  * numberOfBCnodes];
+   // //   nx_dirNW  = &NormalX[dirNW  * numberOfBCnodes];
+   // //   nx_dirTE  = &NormalX[dirTE  * numberOfBCnodes];
+   // //   nx_dirBW  = &NormalX[dirBW  * numberOfBCnodes];
+   // //   nx_dirBE  = &NormalX[dirBE  * numberOfBCnodes];
+   // //   nx_dirTW  = &NormalX[dirTW  * numberOfBCnodes];
+   // //   nx_dirTN  = &NormalX[dirTN  * numberOfBCnodes];
+   // //   nx_dirBS  = &NormalX[dirBS  * numberOfBCnodes];
+   // //   nx_dirBN  = &NormalX[dirBN  * numberOfBCnodes];
+   // //   nx_dirTS  = &NormalX[dirTS  * numberOfBCnodes];
+   // //   nx_dirTNE = &NormalX[dirTNE * numberOfBCnodes];
+   // //   nx_dirTSW = &NormalX[dirTSW * numberOfBCnodes];
+   // //   nx_dirTSE = &NormalX[dirTSE * numberOfBCnodes];
+   // //   nx_dirTNW = &NormalX[dirTNW * numberOfBCnodes];
+   // //   nx_dirBNE = &NormalX[dirBNE * numberOfBCnodes];
+   // //   nx_dirBSW = &NormalX[dirBSW * numberOfBCnodes];
+   // //   nx_dirBSE = &NormalX[dirBSE * numberOfBCnodes];
+   // //   nx_dirBNW = &NormalX[dirBNW * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
    // //           *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
    // //           *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
    // //           *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
    // //           *ny_dirBSE, *ny_dirBNW; 
-   // //   ny_dirE   = &NormalY[dirE   *sizeQ];
-   // //   ny_dirW   = &NormalY[dirW   *sizeQ];
-   // //   ny_dirN   = &NormalY[dirN   *sizeQ];
-   // //   ny_dirS   = &NormalY[dirS   *sizeQ];
-   // //   ny_dirT   = &NormalY[dirT   *sizeQ];
-   // //   ny_dirB   = &NormalY[dirB   *sizeQ];
-   // //   ny_dirNE  = &NormalY[dirNE  *sizeQ];
-   // //   ny_dirSW  = &NormalY[dirSW  *sizeQ];
-   // //   ny_dirSE  = &NormalY[dirSE  *sizeQ];
-   // //   ny_dirNW  = &NormalY[dirNW  *sizeQ];
-   // //   ny_dirTE  = &NormalY[dirTE  *sizeQ];
-   // //   ny_dirBW  = &NormalY[dirBW  *sizeQ];
-   // //   ny_dirBE  = &NormalY[dirBE  *sizeQ];
-   // //   ny_dirTW  = &NormalY[dirTW  *sizeQ];
-   // //   ny_dirTN  = &NormalY[dirTN  *sizeQ];
-   // //   ny_dirBS  = &NormalY[dirBS  *sizeQ];
-   // //   ny_dirBN  = &NormalY[dirBN  *sizeQ];
-   // //   ny_dirTS  = &NormalY[dirTS  *sizeQ];
-   // //   ny_dirTNE = &NormalY[dirTNE *sizeQ];
-   // //   ny_dirTSW = &NormalY[dirTSW *sizeQ];
-   // //   ny_dirTSE = &NormalY[dirTSE *sizeQ];
-   // //   ny_dirTNW = &NormalY[dirTNW *sizeQ];
-   // //   ny_dirBNE = &NormalY[dirBNE *sizeQ];
-   // //   ny_dirBSW = &NormalY[dirBSW *sizeQ];
-   // //   ny_dirBSE = &NormalY[dirBSE *sizeQ];
-   // //   ny_dirBNW = &NormalY[dirBNW *sizeQ];
+   // //   ny_dirE   = &NormalY[dirE   * numberOfBCnodes];
+   // //   ny_dirW   = &NormalY[dirW   * numberOfBCnodes];
+   // //   ny_dirN   = &NormalY[dirN   * numberOfBCnodes];
+   // //   ny_dirS   = &NormalY[dirS   * numberOfBCnodes];
+   // //   ny_dirT   = &NormalY[dirT   * numberOfBCnodes];
+   // //   ny_dirB   = &NormalY[dirB   * numberOfBCnodes];
+   // //   ny_dirNE  = &NormalY[dirNE  * numberOfBCnodes];
+   // //   ny_dirSW  = &NormalY[dirSW  * numberOfBCnodes];
+   // //   ny_dirSE  = &NormalY[dirSE  * numberOfBCnodes];
+   // //   ny_dirNW  = &NormalY[dirNW  * numberOfBCnodes];
+   // //   ny_dirTE  = &NormalY[dirTE  * numberOfBCnodes];
+   // //   ny_dirBW  = &NormalY[dirBW  * numberOfBCnodes];
+   // //   ny_dirBE  = &NormalY[dirBE  * numberOfBCnodes];
+   // //   ny_dirTW  = &NormalY[dirTW  * numberOfBCnodes];
+   // //   ny_dirTN  = &NormalY[dirTN  * numberOfBCnodes];
+   // //   ny_dirBS  = &NormalY[dirBS  * numberOfBCnodes];
+   // //   ny_dirBN  = &NormalY[dirBN  * numberOfBCnodes];
+   // //   ny_dirTS  = &NormalY[dirTS  * numberOfBCnodes];
+   // //   ny_dirTNE = &NormalY[dirTNE * numberOfBCnodes];
+   // //   ny_dirTSW = &NormalY[dirTSW * numberOfBCnodes];
+   // //   ny_dirTSE = &NormalY[dirTSE * numberOfBCnodes];
+   // //   ny_dirTNW = &NormalY[dirTNW * numberOfBCnodes];
+   // //   ny_dirBNE = &NormalY[dirBNE * numberOfBCnodes];
+   // //   ny_dirBSW = &NormalY[dirBSW * numberOfBCnodes];
+   // //   ny_dirBSE = &NormalY[dirBSE * numberOfBCnodes];
+   // //   ny_dirBNW = &NormalY[dirBNW * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
    // //           *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
    // //           *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
    // //           *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
    // //           *nz_dirBSE, *nz_dirBNW; 
-   // //   nz_dirE   = &NormalZ[dirE   *sizeQ];
-   // //   nz_dirW   = &NormalZ[dirW   *sizeQ];
-   // //   nz_dirN   = &NormalZ[dirN   *sizeQ];
-   // //   nz_dirS   = &NormalZ[dirS   *sizeQ];
-   // //   nz_dirT   = &NormalZ[dirT   *sizeQ];
-   // //   nz_dirB   = &NormalZ[dirB   *sizeQ];
-   // //   nz_dirNE  = &NormalZ[dirNE  *sizeQ];
-   // //   nz_dirSW  = &NormalZ[dirSW  *sizeQ];
-   // //   nz_dirSE  = &NormalZ[dirSE  *sizeQ];
-   // //   nz_dirNW  = &NormalZ[dirNW  *sizeQ];
-   // //   nz_dirTE  = &NormalZ[dirTE  *sizeQ];
-   // //   nz_dirBW  = &NormalZ[dirBW  *sizeQ];
-   // //   nz_dirBE  = &NormalZ[dirBE  *sizeQ];
-   // //   nz_dirTW  = &NormalZ[dirTW  *sizeQ];
-   // //   nz_dirTN  = &NormalZ[dirTN  *sizeQ];
-   // //   nz_dirBS  = &NormalZ[dirBS  *sizeQ];
-   // //   nz_dirBN  = &NormalZ[dirBN  *sizeQ];
-   // //   nz_dirTS  = &NormalZ[dirTS  *sizeQ];
-   // //   nz_dirTNE = &NormalZ[dirTNE *sizeQ];
-   // //   nz_dirTSW = &NormalZ[dirTSW *sizeQ];
-   // //   nz_dirTSE = &NormalZ[dirTSE *sizeQ];
-   // //   nz_dirTNW = &NormalZ[dirTNW *sizeQ];
-   // //   nz_dirBNE = &NormalZ[dirBNE *sizeQ];
-   // //   nz_dirBSW = &NormalZ[dirBSW *sizeQ];
-   // //   nz_dirBSE = &NormalZ[dirBSE *sizeQ];
-   // //   nz_dirBNW = &NormalZ[dirBNW *sizeQ];
+   // //   nz_dirE   = &NormalZ[dirE   * numberOfBCnodes];
+   // //   nz_dirW   = &NormalZ[dirW   * numberOfBCnodes];
+   // //   nz_dirN   = &NormalZ[dirN   * numberOfBCnodes];
+   // //   nz_dirS   = &NormalZ[dirS   * numberOfBCnodes];
+   // //   nz_dirT   = &NormalZ[dirT   * numberOfBCnodes];
+   // //   nz_dirB   = &NormalZ[dirB   * numberOfBCnodes];
+   // //   nz_dirNE  = &NormalZ[dirNE  * numberOfBCnodes];
+   // //   nz_dirSW  = &NormalZ[dirSW  * numberOfBCnodes];
+   // //   nz_dirSE  = &NormalZ[dirSE  * numberOfBCnodes];
+   // //   nz_dirNW  = &NormalZ[dirNW  * numberOfBCnodes];
+   // //   nz_dirTE  = &NormalZ[dirTE  * numberOfBCnodes];
+   // //   nz_dirBW  = &NormalZ[dirBW  * numberOfBCnodes];
+   // //   nz_dirBE  = &NormalZ[dirBE  * numberOfBCnodes];
+   // //   nz_dirTW  = &NormalZ[dirTW  * numberOfBCnodes];
+   // //   nz_dirTN  = &NormalZ[dirTN  * numberOfBCnodes];
+   // //   nz_dirBS  = &NormalZ[dirBS  * numberOfBCnodes];
+   // //   nz_dirBN  = &NormalZ[dirBN  * numberOfBCnodes];
+   // //   nz_dirTS  = &NormalZ[dirTS  * numberOfBCnodes];
+   // //   nz_dirTNE = &NormalZ[dirTNE * numberOfBCnodes];
+   // //   nz_dirTSW = &NormalZ[dirTSW * numberOfBCnodes];
+   // //   nz_dirTSE = &NormalZ[dirTSE * numberOfBCnodes];
+   // //   nz_dirTNW = &NormalZ[dirTNW * numberOfBCnodes];
+   // //   nz_dirBNE = &NormalZ[dirBNE * numberOfBCnodes];
+   // //   nz_dirBSW = &NormalZ[dirBSW * numberOfBCnodes];
+   // //   nz_dirBSE = &NormalZ[dirBSE * numberOfBCnodes];
+   // //   nz_dirBNW = &NormalZ[dirBNW * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //bool changeCell = false;
    //   unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index c358fe184f32f0e778b913dc54a321e20e0ebadf..a7d711db8ec24bd160f20b5085ffc3f414285c1a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -1,7 +1,8 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "LBM/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "lbm/constants/NumericConstants.h"
+#include "KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 
@@ -803,33 +804,53 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
-                                             real* DD, 
-                                             int* k_Q, 
-                                             int* k_N, 
-                                             int numberOfBCnodes, 
-                                             real om1, 
+                                             real* distribution, 
+                                             int* bcNodeIndices,
+                                             int* bcNeighborIndices,
+                                             int numberOfBCnodes,
+                                             real omega1, 
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat, 
+                                             unsigned int numberOfLBnodes, 
                                              bool isEvenTimestep)
 {
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //////////////////////////////////////////////////////////////////////////
+	//! The pressure boundary condition is executed in the following steps
+	//!
+	////////////////////////////////////////////////////////////////////////////////
+	//! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
+	//!
+   const unsigned x = threadIdx.x;    // global x-index 
+   const unsigned y = blockIdx.x;     // global y-index 
+   const unsigned z = blockIdx.y;     // global z-index 
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
 
    const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
+   //////////////////////////////////////////////////////////////////////////
+   //! - Run for all indices in size of boundary condition (numberOfBCnodes)
+   //!
+   if(k < numberOfBCnodes)
    {
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
+      //! - Set local pressure
+      //!
+      real rhoBClocal = rhoBC[k];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int KQK  = bcNodeIndices[k];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -858,8 +879,9 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //index1
-      unsigned int K1QK  = k_N[k];
+      //! - Set neighbor indices (necessary for indirect addressing) for neighboring node
+      //!
+      unsigned int K1QK  = bcNeighborIndices[k];
       unsigned int k1zero= K1QK;
       unsigned int k1e   = K1QK;
       unsigned int k1w   = neighborX[K1QK];
@@ -887,123 +909,62 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       unsigned int k1bne = k1b;
       unsigned int k1tne = K1QK;
       unsigned int k1bsw = neighborZ[k1sw];
-      ////////////////////////////////////////////////////////////////////////////////
-      Distributions27 D;
-      if (isEvenTimestep==true) //// ACHTUNG PREColl !!!!!!!!!!!!!!
-      {
-         D.f[dirE   ] = &DD[dirE   *size_Mat];
-         D.f[dirW   ] = &DD[dirW   *size_Mat];
-         D.f[dirN   ] = &DD[dirN   *size_Mat];
-         D.f[dirS   ] = &DD[dirS   *size_Mat];
-         D.f[dirT   ] = &DD[dirT   *size_Mat];
-         D.f[dirB   ] = &DD[dirB   *size_Mat];
-         D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-         D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-         D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-         D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-         D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-         D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-         D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-         D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-         D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-         D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-         D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-         D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-         D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-         D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-         D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-         D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-         D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-      } 
-      else
-      {
-         D.f[dirW   ] = &DD[dirE   *size_Mat];
-         D.f[dirE   ] = &DD[dirW   *size_Mat];
-         D.f[dirS   ] = &DD[dirN   *size_Mat];
-         D.f[dirN   ] = &DD[dirS   *size_Mat];
-         D.f[dirB   ] = &DD[dirT   *size_Mat];
-         D.f[dirT   ] = &DD[dirB   *size_Mat];
-         D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-         D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-         D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-         D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-         D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-         D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-         D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-         D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-         D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-         D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-         D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-         D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-         D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-         D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-         D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-         D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-         D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-      }
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
-                     f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-
-      f1_W    = (D.f[dirE   ])[k1e   ];
-      f1_E    = (D.f[dirW   ])[k1w   ];
-      f1_S    = (D.f[dirN   ])[k1n   ];
-      f1_N    = (D.f[dirS   ])[k1s   ];
-      f1_B    = (D.f[dirT   ])[k1t   ];
-      f1_T    = (D.f[dirB   ])[k1b   ];
-      f1_SW   = (D.f[dirNE  ])[k1ne  ];
-      f1_NE   = (D.f[dirSW  ])[k1sw  ];
-      f1_NW   = (D.f[dirSE  ])[k1se  ];
-      f1_SE   = (D.f[dirNW  ])[k1nw  ];
-      f1_BW   = (D.f[dirTE  ])[k1te  ];
-      f1_TE   = (D.f[dirBW  ])[k1bw  ];
-      f1_TW   = (D.f[dirBE  ])[k1be  ];
-      f1_BE   = (D.f[dirTW  ])[k1tw  ];
-      f1_BS   = (D.f[dirTN  ])[k1tn  ];
-      f1_TN   = (D.f[dirBS  ])[k1bs  ];
-      f1_TS   = (D.f[dirBN  ])[k1bn  ];
-      f1_BN   = (D.f[dirTS  ])[k1ts  ];
-      f1_ZERO = (D.f[dirZERO])[k1zero];
-      f1_BSW  = (D.f[dirTNE ])[k1tne ];
-      f1_BNE  = (D.f[dirTSW ])[k1tsw ];
-      f1_BNW  = (D.f[dirTSE ])[k1tse ];
-      f1_BSE  = (D.f[dirTNW ])[k1tnw ];
-      f1_TSW  = (D.f[dirBNE ])[k1bne ];
-      f1_TNE  = (D.f[dirBSW ])[k1bsw ];
-      f1_TNW  = (D.f[dirBSE ])[k1bse ];
-      f1_TSE  = (D.f[dirBNW ])[k1bnw ];
 
-      //////////////////////////////////////////////////////////////////////////
-      real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
-                          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
-
-      real vx1      =  ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						  ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-						  (f1_E - f1_W); 
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set local distributions for neighboring node
+      //!
+      real f1_W    = (dist.f[dirE   ])[k1e   ];
+      real f1_E    = (dist.f[dirW   ])[k1w   ];
+      real f1_S    = (dist.f[dirN   ])[k1n   ];
+      real f1_N    = (dist.f[dirS   ])[k1s   ];
+      real f1_B    = (dist.f[dirT   ])[k1t   ];
+      real f1_T    = (dist.f[dirB   ])[k1b   ];
+      real f1_SW   = (dist.f[dirNE  ])[k1ne  ];
+      real f1_NE   = (dist.f[dirSW  ])[k1sw  ];
+      real f1_NW   = (dist.f[dirSE  ])[k1se  ];
+      real f1_SE   = (dist.f[dirNW  ])[k1nw  ];
+      real f1_BW   = (dist.f[dirTE  ])[k1te  ];
+      real f1_TE   = (dist.f[dirBW  ])[k1bw  ];
+      real f1_TW   = (dist.f[dirBE  ])[k1be  ];
+      real f1_BE   = (dist.f[dirTW  ])[k1tw  ];
+      real f1_BS   = (dist.f[dirTN  ])[k1tn  ];
+      real f1_TN   = (dist.f[dirBS  ])[k1bs  ];
+      real f1_TS   = (dist.f[dirBN  ])[k1bn  ];
+      real f1_BN   = (dist.f[dirTS  ])[k1ts  ];
+      real f1_ZERO = (dist.f[dirZERO])[k1zero];
+      real f1_BSW  = (dist.f[dirTNE ])[k1tne ];
+      real f1_BNE  = (dist.f[dirTSW ])[k1tsw ];
+      real f1_BNW  = (dist.f[dirTSE ])[k1tse ];
+      real f1_BSE  = (dist.f[dirTNW ])[k1tnw ];
+      real f1_TSW  = (dist.f[dirBNE ])[k1bne ];
+      real f1_TNE  = (dist.f[dirBSW ])[k1bsw ];
+      real f1_TNW  = (dist.f[dirBSE ])[k1bse ];
+      real f1_TSE  = (dist.f[dirBNW ])[k1bnw ];
 
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Calculate macroscopic quantities (for neighboring node)
+      //!
+      real drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
+                   f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
+                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[dirZERO])[kzero]); 
 
-      real vx2    =   (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						 ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-						 (f1_N - f1_S); 
+      real vx1  = (((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
+                   ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
+                   (f1_E - f1_W)) / (c1o1 + drho1);          
 
-      real vx3    =   ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
-						 (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-						 (f1_T - f1_B); 
+      real vx2  = ((-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
+                   ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
+                   (f1_N - f1_S)) / (c1o1 + drho1); 
 
-	  vx1 /= (drho1+c1o1);
-	  vx2 /= (drho1+c1o1);
-	  vx3 /= (drho1+c1o1);
+      real vx3  = (((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
+                   (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
+                   (f1_T - f1_B)) / (c1o1 + drho1); 
 
-      real cusq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
+      real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
+      ////////////////////////////////////////////////////////////////////////////////
+      //! subtract the equilibrium (eq) to obtain the non-equilibrium (neq) (for neighboring node)
+      //!
       f1_ZERO  -= c8o27*  (drho1-(drho1+c1o1)*cusq);
       f1_E     -= c2o27*  (drho1+(drho1+c1o1)*(c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cusq));
       f1_W     -= c2o27*  (drho1+(drho1+c1o1)*(c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cusq));
@@ -1031,9 +992,15 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       f1_BNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq));
       f1_BSE   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
-	   
-	  drho1 = rhoBC[k];
 
+      ////////////////////////////////////////////////////////////////////////////////
+      //! redefine drho1 with rhoBClocal
+      //!
+      drho1 = rhoBClocal;
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! add the equilibrium (eq), which is calculated with rhoBClocal (for neighboring node)
+      //!
       f1_ZERO  += c8o27*  (drho1-(drho1+c1o1)*cusq);
       f1_E     += c2o27*  (drho1+(drho1+c1o1)*(c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cusq));
       f1_W     += c2o27*  (drho1+(drho1+c1o1)*(c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cusq));
@@ -1062,39 +1029,40 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       f1_BSE   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  //drho1 = drho1 - rhoBC[k];
       //////////////////////////////////////////////////////////////////////////
 
       __syncthreads();
 
-      (D.f[dirE   ])[ke   ] = f1_W   ;  
-      (D.f[dirW   ])[kw   ] = f1_E   ;	
-      (D.f[dirN   ])[kn   ] = f1_S   ;	
-      (D.f[dirS   ])[ks   ] = f1_N   ;	
-      (D.f[dirT   ])[kt   ] = f1_B   ;	
-      (D.f[dirB   ])[kb   ] = f1_T   ;	
-      (D.f[dirNE  ])[kne  ] = f1_SW  ;	
-      (D.f[dirSW  ])[ksw  ] = f1_NE  ;	
-      (D.f[dirSE  ])[kse  ] = f1_NW  ;	
-      (D.f[dirNW  ])[knw  ] = f1_SE  ;	
-      (D.f[dirTE  ])[kte  ] = f1_BW  ;	
-      (D.f[dirBW  ])[kbw  ] = f1_TE  ;	
-      (D.f[dirBE  ])[kbe  ] = f1_TW  ;	
-      (D.f[dirTW  ])[ktw  ] = f1_BE  ;	
-      (D.f[dirTN  ])[ktn  ] = f1_BS  ;	
-      (D.f[dirBS  ])[kbs  ] = f1_TN  ;	
-      (D.f[dirBN  ])[kbn  ] = f1_TS  ;	
-      (D.f[dirTS  ])[kts  ] = f1_BN  ;	
-      (D.f[dirZERO])[kzero] = f1_ZERO;	
-      (D.f[dirTNE ])[ktne ] = f1_BSW ;	
-      (D.f[dirTSW ])[ktsw ] = f1_BNE ;	
-      (D.f[dirTSE ])[ktse ] = f1_BNW ;	
-      (D.f[dirTNW ])[ktnw ] = f1_BSE ;	
-      (D.f[dirBNE ])[kbne ] = f1_TSW ;	
-      (D.f[dirBSW ])[kbsw ] = f1_TNE ;	
-      (D.f[dirBSE ])[kbse ] = f1_TNW ;	
-      (D.f[dirBNW ])[kbnw ] = f1_TSE ;       
+      ////////////////////////////////////////////////////////////////////////////////
+      //! write the new distributions to the bc nodes
+      //!
+      (dist.f[dirE   ])[ke   ] = f1_W   ;
+      (dist.f[dirW   ])[kw   ] = f1_E   ;
+      (dist.f[dirN   ])[kn   ] = f1_S   ;
+      (dist.f[dirS   ])[ks   ] = f1_N   ;
+      (dist.f[dirT   ])[kt   ] = f1_B   ;
+      (dist.f[dirB   ])[kb   ] = f1_T   ;
+      (dist.f[dirNE  ])[kne  ] = f1_SW  ;
+      (dist.f[dirSW  ])[ksw  ] = f1_NE  ;
+      (dist.f[dirSE  ])[kse  ] = f1_NW  ;
+      (dist.f[dirNW  ])[knw  ] = f1_SE  ;
+      (dist.f[dirTE  ])[kte  ] = f1_BW  ;
+      (dist.f[dirBW  ])[kbw  ] = f1_TE  ;
+      (dist.f[dirBE  ])[kbe  ] = f1_TW  ;
+      (dist.f[dirTW  ])[ktw  ] = f1_BE  ;
+      (dist.f[dirTN  ])[ktn  ] = f1_BS  ;
+      (dist.f[dirBS  ])[kbs  ] = f1_TN  ;
+      (dist.f[dirBN  ])[kbn  ] = f1_TS  ;
+      (dist.f[dirTS  ])[kts  ] = f1_BN  ;
+      (dist.f[dirZERO])[kzero] = f1_ZERO;
+      (dist.f[dirTNE ])[ktne ] = f1_BSW ;
+      (dist.f[dirTSW ])[ktsw ] = f1_BNE ;
+      (dist.f[dirTSE ])[ktse ] = f1_BNW ;
+      (dist.f[dirTNW ])[ktnw ] = f1_BSE ;
+      (dist.f[dirBNE ])[kbne ] = f1_TSW ;
+      (dist.f[dirBSW ])[kbsw ] = f1_TNE ;
+      (dist.f[dirBSE ])[kbse ] = f1_TNW ;
+      (dist.f[dirBNW ])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1447,14 +1415,11 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDevice27(int inx,
-                                           int iny,
-                                           real* rhoBC,
+extern "C" __global__ void QPressDevice27(real* rhoBC,
                                            real* DD, 
                                            int* k_Q, 
                                            real* QQ,
-                                           unsigned int sizeQ,
-                                           int numberOfBCnodes, 
+                                           unsigned int numberOfBCnodes, 
                                            real om1, 
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
@@ -1541,32 +1506,32 @@ extern "C" __global__ void QPressDevice27(int inx,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4853,8 +4818,7 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 												real* DD, 
 												int* k_Q, 
 												real* QQ,
-												unsigned int sizeQ,
-												int numberOfBCnodes, 
+												unsigned int numberOfBCnodes, 
 												real om1, 
 												unsigned int* neighborX,
 												unsigned int* neighborY,
@@ -4934,7 +4898,7 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 	const unsigned k = nx*(ny*z + y) + x;
 	//////////////////////////////////////////////////////////////////////////
 
-	if(k<numberOfBCnodes)
+	if(k < numberOfBCnodes)
 	{
 		////////////////////////////////////////////////////////////////////////////////
 		//real VeloX = vx[k];
@@ -4946,32 +4910,32 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[dirE   *sizeQ];
-		q_dirW   = &QQ[dirW   *sizeQ];
-		q_dirN   = &QQ[dirN   *sizeQ];
-		q_dirS   = &QQ[dirS   *sizeQ];
-		q_dirT   = &QQ[dirT   *sizeQ];
-		q_dirB   = &QQ[dirB   *sizeQ];
-		q_dirNE  = &QQ[dirNE  *sizeQ];
-		q_dirSW  = &QQ[dirSW  *sizeQ];
-		q_dirSE  = &QQ[dirSE  *sizeQ];
-		q_dirNW  = &QQ[dirNW  *sizeQ];
-		q_dirTE  = &QQ[dirTE  *sizeQ];
-		q_dirBW  = &QQ[dirBW  *sizeQ];
-		q_dirBE  = &QQ[dirBE  *sizeQ];
-		q_dirTW  = &QQ[dirTW  *sizeQ];
-		q_dirTN  = &QQ[dirTN  *sizeQ];
-		q_dirBS  = &QQ[dirBS  *sizeQ];
-		q_dirBN  = &QQ[dirBN  *sizeQ];
-		q_dirTS  = &QQ[dirTS  *sizeQ];
-		q_dirTNE = &QQ[dirTNE *sizeQ];
-		q_dirTSW = &QQ[dirTSW *sizeQ];
-		q_dirTSE = &QQ[dirTSE *sizeQ];
-		q_dirTNW = &QQ[dirTNW *sizeQ];
-		q_dirBNE = &QQ[dirBNE *sizeQ];
-		q_dirBSW = &QQ[dirBSW *sizeQ];
-		q_dirBSE = &QQ[dirBSE *sizeQ];
-		q_dirBNW = &QQ[dirBNW *sizeQ];
+		q_dirE   = &QQ[dirE   * numberOfBCnodes];
+		q_dirW   = &QQ[dirW   * numberOfBCnodes];
+		q_dirN   = &QQ[dirN   * numberOfBCnodes];
+		q_dirS   = &QQ[dirS   * numberOfBCnodes];
+		q_dirT   = &QQ[dirT   * numberOfBCnodes];
+		q_dirB   = &QQ[dirB   * numberOfBCnodes];
+		q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+		q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+		q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+		q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+		q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+		q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+		q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+		q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+		q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+		q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+		q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+		q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+		q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+		q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+		q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+		q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+		q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+		q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+		q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+		q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index bae041bcdbaf56c86c0f16a3f622784f870fab87..804aa496bdeac7cdf0ea4b0ad4ef155483b5912d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -9,7 +9,7 @@ using namespace vf::lbm::constant;
 extern "C" __global__ void QSlipDevice27(real* DD, 
                                          int* k_Q, 
                                          real* QQ,
-                                         unsigned int sizeQ,
+                                         unsigned int numberOfBCnodes,
                                          real om1, 
                                          unsigned int* neighborX,
                                          unsigned int* neighborY,
@@ -89,7 +89,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -97,32 +97,32 @@ extern "C" __global__ void QSlipDevice27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -659,7 +659,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 extern "C" __global__ void QSlipDeviceComp27(real* DD, 
 											 int* k_Q, 
 											 real* QQ,
-											 unsigned int sizeQ,
+											 unsigned int numberOfBCnodes,
 											 real om1, 
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -739,7 +739,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k < numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -747,32 +747,32 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1324,7 +1324,7 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
 extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD, 
 											 int* k_Q, 
 											 real* QQ,
-											 unsigned int sizeQ,
+											 unsigned int numberOfBCnodes,
 											 real om1, 
 											 unsigned int* neighborX,
 											 unsigned int* neighborY,
@@ -1405,7 +1405,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -1413,32 +1413,32 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2029,7 +2029,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 extern "C" __global__ void QSlipGeomDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int sizeQ,
+												 unsigned int  numberOfBCnodes,
 												 real om1, 
 												 real* NormalX,
 												 real* NormalY,
@@ -2112,7 +2112,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k< numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -2120,128 +2120,128 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
               *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[dirE   *sizeQ];
-      nx_dirW   = &NormalX[dirW   *sizeQ];
-      nx_dirN   = &NormalX[dirN   *sizeQ];
-      nx_dirS   = &NormalX[dirS   *sizeQ];
-      nx_dirT   = &NormalX[dirT   *sizeQ];
-      nx_dirB   = &NormalX[dirB   *sizeQ];
-      nx_dirNE  = &NormalX[dirNE  *sizeQ];
-      nx_dirSW  = &NormalX[dirSW  *sizeQ];
-      nx_dirSE  = &NormalX[dirSE  *sizeQ];
-      nx_dirNW  = &NormalX[dirNW  *sizeQ];
-      nx_dirTE  = &NormalX[dirTE  *sizeQ];
-      nx_dirBW  = &NormalX[dirBW  *sizeQ];
-      nx_dirBE  = &NormalX[dirBE  *sizeQ];
-      nx_dirTW  = &NormalX[dirTW  *sizeQ];
-      nx_dirTN  = &NormalX[dirTN  *sizeQ];
-      nx_dirBS  = &NormalX[dirBS  *sizeQ];
-      nx_dirBN  = &NormalX[dirBN  *sizeQ];
-      nx_dirTS  = &NormalX[dirTS  *sizeQ];
-      nx_dirTNE = &NormalX[dirTNE *sizeQ];
-      nx_dirTSW = &NormalX[dirTSW *sizeQ];
-      nx_dirTSE = &NormalX[dirTSE *sizeQ];
-      nx_dirTNW = &NormalX[dirTNW *sizeQ];
-      nx_dirBNE = &NormalX[dirBNE *sizeQ];
-      nx_dirBSW = &NormalX[dirBSW *sizeQ];
-      nx_dirBSE = &NormalX[dirBSE *sizeQ];
-      nx_dirBNW = &NormalX[dirBNW *sizeQ];
+      nx_dirE   = &NormalX[dirE   * numberOfBCnodes];
+      nx_dirW   = &NormalX[dirW   * numberOfBCnodes];
+      nx_dirN   = &NormalX[dirN   * numberOfBCnodes];
+      nx_dirS   = &NormalX[dirS   * numberOfBCnodes];
+      nx_dirT   = &NormalX[dirT   * numberOfBCnodes];
+      nx_dirB   = &NormalX[dirB   * numberOfBCnodes];
+      nx_dirNE  = &NormalX[dirNE  * numberOfBCnodes];
+      nx_dirSW  = &NormalX[dirSW  * numberOfBCnodes];
+      nx_dirSE  = &NormalX[dirSE  * numberOfBCnodes];
+      nx_dirNW  = &NormalX[dirNW  * numberOfBCnodes];
+      nx_dirTE  = &NormalX[dirTE  * numberOfBCnodes];
+      nx_dirBW  = &NormalX[dirBW  * numberOfBCnodes];
+      nx_dirBE  = &NormalX[dirBE  * numberOfBCnodes];
+      nx_dirTW  = &NormalX[dirTW  * numberOfBCnodes];
+      nx_dirTN  = &NormalX[dirTN  * numberOfBCnodes];
+      nx_dirBS  = &NormalX[dirBS  * numberOfBCnodes];
+      nx_dirBN  = &NormalX[dirBN  * numberOfBCnodes];
+      nx_dirTS  = &NormalX[dirTS  * numberOfBCnodes];
+      nx_dirTNE = &NormalX[dirTNE * numberOfBCnodes];
+      nx_dirTSW = &NormalX[dirTSW * numberOfBCnodes];
+      nx_dirTSE = &NormalX[dirTSE * numberOfBCnodes];
+      nx_dirTNW = &NormalX[dirTNW * numberOfBCnodes];
+      nx_dirBNE = &NormalX[dirBNE * numberOfBCnodes];
+      nx_dirBSW = &NormalX[dirBSW * numberOfBCnodes];
+      nx_dirBSE = &NormalX[dirBSE * numberOfBCnodes];
+      nx_dirBNW = &NormalX[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
               *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[dirE   *sizeQ];
-      ny_dirW   = &NormalY[dirW   *sizeQ];
-      ny_dirN   = &NormalY[dirN   *sizeQ];
-      ny_dirS   = &NormalY[dirS   *sizeQ];
-      ny_dirT   = &NormalY[dirT   *sizeQ];
-      ny_dirB   = &NormalY[dirB   *sizeQ];
-      ny_dirNE  = &NormalY[dirNE  *sizeQ];
-      ny_dirSW  = &NormalY[dirSW  *sizeQ];
-      ny_dirSE  = &NormalY[dirSE  *sizeQ];
-      ny_dirNW  = &NormalY[dirNW  *sizeQ];
-      ny_dirTE  = &NormalY[dirTE  *sizeQ];
-      ny_dirBW  = &NormalY[dirBW  *sizeQ];
-      ny_dirBE  = &NormalY[dirBE  *sizeQ];
-      ny_dirTW  = &NormalY[dirTW  *sizeQ];
-      ny_dirTN  = &NormalY[dirTN  *sizeQ];
-      ny_dirBS  = &NormalY[dirBS  *sizeQ];
-      ny_dirBN  = &NormalY[dirBN  *sizeQ];
-      ny_dirTS  = &NormalY[dirTS  *sizeQ];
-      ny_dirTNE = &NormalY[dirTNE *sizeQ];
-      ny_dirTSW = &NormalY[dirTSW *sizeQ];
-      ny_dirTSE = &NormalY[dirTSE *sizeQ];
-      ny_dirTNW = &NormalY[dirTNW *sizeQ];
-      ny_dirBNE = &NormalY[dirBNE *sizeQ];
-      ny_dirBSW = &NormalY[dirBSW *sizeQ];
-      ny_dirBSE = &NormalY[dirBSE *sizeQ];
-      ny_dirBNW = &NormalY[dirBNW *sizeQ];
+      ny_dirE   = &NormalY[dirE   * numberOfBCnodes];
+      ny_dirW   = &NormalY[dirW   * numberOfBCnodes];
+      ny_dirN   = &NormalY[dirN   * numberOfBCnodes];
+      ny_dirS   = &NormalY[dirS   * numberOfBCnodes];
+      ny_dirT   = &NormalY[dirT   * numberOfBCnodes];
+      ny_dirB   = &NormalY[dirB   * numberOfBCnodes];
+      ny_dirNE  = &NormalY[dirNE  * numberOfBCnodes];
+      ny_dirSW  = &NormalY[dirSW  * numberOfBCnodes];
+      ny_dirSE  = &NormalY[dirSE  * numberOfBCnodes];
+      ny_dirNW  = &NormalY[dirNW  * numberOfBCnodes];
+      ny_dirTE  = &NormalY[dirTE  * numberOfBCnodes];
+      ny_dirBW  = &NormalY[dirBW  * numberOfBCnodes];
+      ny_dirBE  = &NormalY[dirBE  * numberOfBCnodes];
+      ny_dirTW  = &NormalY[dirTW  * numberOfBCnodes];
+      ny_dirTN  = &NormalY[dirTN  * numberOfBCnodes];
+      ny_dirBS  = &NormalY[dirBS  * numberOfBCnodes];
+      ny_dirBN  = &NormalY[dirBN  * numberOfBCnodes];
+      ny_dirTS  = &NormalY[dirTS  * numberOfBCnodes];
+      ny_dirTNE = &NormalY[dirTNE * numberOfBCnodes];
+      ny_dirTSW = &NormalY[dirTSW * numberOfBCnodes];
+      ny_dirTSE = &NormalY[dirTSE * numberOfBCnodes];
+      ny_dirTNW = &NormalY[dirTNW * numberOfBCnodes];
+      ny_dirBNE = &NormalY[dirBNE * numberOfBCnodes];
+      ny_dirBSW = &NormalY[dirBSW * numberOfBCnodes];
+      ny_dirBSE = &NormalY[dirBSE * numberOfBCnodes];
+      ny_dirBNW = &NormalY[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
               *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[dirE   *sizeQ];
-      nz_dirW   = &NormalZ[dirW   *sizeQ];
-      nz_dirN   = &NormalZ[dirN   *sizeQ];
-      nz_dirS   = &NormalZ[dirS   *sizeQ];
-      nz_dirT   = &NormalZ[dirT   *sizeQ];
-      nz_dirB   = &NormalZ[dirB   *sizeQ];
-      nz_dirNE  = &NormalZ[dirNE  *sizeQ];
-      nz_dirSW  = &NormalZ[dirSW  *sizeQ];
-      nz_dirSE  = &NormalZ[dirSE  *sizeQ];
-      nz_dirNW  = &NormalZ[dirNW  *sizeQ];
-      nz_dirTE  = &NormalZ[dirTE  *sizeQ];
-      nz_dirBW  = &NormalZ[dirBW  *sizeQ];
-      nz_dirBE  = &NormalZ[dirBE  *sizeQ];
-      nz_dirTW  = &NormalZ[dirTW  *sizeQ];
-      nz_dirTN  = &NormalZ[dirTN  *sizeQ];
-      nz_dirBS  = &NormalZ[dirBS  *sizeQ];
-      nz_dirBN  = &NormalZ[dirBN  *sizeQ];
-      nz_dirTS  = &NormalZ[dirTS  *sizeQ];
-      nz_dirTNE = &NormalZ[dirTNE *sizeQ];
-      nz_dirTSW = &NormalZ[dirTSW *sizeQ];
-      nz_dirTSE = &NormalZ[dirTSE *sizeQ];
-      nz_dirTNW = &NormalZ[dirTNW *sizeQ];
-      nz_dirBNE = &NormalZ[dirBNE *sizeQ];
-      nz_dirBSW = &NormalZ[dirBSW *sizeQ];
-      nz_dirBSE = &NormalZ[dirBSE *sizeQ];
-      nz_dirBNW = &NormalZ[dirBNW *sizeQ];
+      nz_dirE   = &NormalZ[dirE   * numberOfBCnodes];
+      nz_dirW   = &NormalZ[dirW   * numberOfBCnodes];
+      nz_dirN   = &NormalZ[dirN   * numberOfBCnodes];
+      nz_dirS   = &NormalZ[dirS   * numberOfBCnodes];
+      nz_dirT   = &NormalZ[dirT   * numberOfBCnodes];
+      nz_dirB   = &NormalZ[dirB   * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[dirNE  * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[dirSW  * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[dirSE  * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[dirNW  * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[dirTE  * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[dirBW  * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[dirBE  * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[dirTW  * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[dirTN  * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[dirBS  * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[dirBN  * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[dirTS  * numberOfBCnodes];
+      nz_dirTNE = &NormalZ[dirTNE * numberOfBCnodes];
+      nz_dirTSW = &NormalZ[dirTSW * numberOfBCnodes];
+      nz_dirTSE = &NormalZ[dirTSE * numberOfBCnodes];
+      nz_dirTNW = &NormalZ[dirTNW * numberOfBCnodes];
+      nz_dirBNE = &NormalZ[dirBNE * numberOfBCnodes];
+      nz_dirBSW = &NormalZ[dirBSW * numberOfBCnodes];
+      nz_dirBSE = &NormalZ[dirBSE * numberOfBCnodes];
+      nz_dirBNW = &NormalZ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2401,11 +2401,11 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 
 	  //fac = fac * magS / (c1o3 * (one / om1 - c1o2));
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //real *facAst = &QQ[dirZERO *sizeQ];
+	  //real *facAst = &QQ[dirZERO * numberOfBCnodes];
 
 	  //fac = fac * alpha + facAst[k] * (one - alpha);
 	  //facAst[k] = fac;
-	  //(&QQ[dirZERO *sizeQ])[KQK] = fac;
+	  //(&QQ[dirZERO * numberOfBCnodes])[KQK] = fac;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ////real uk = sqrtf(vx1*vx1 + vx2*vx2 + vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2915,7 +2915,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 extern "C" __global__ void QSlipNormDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int sizeQ,
+												 unsigned int  numberOfBCnodes,
 												 real om1, 
 												 real* NormalX,
 												 real* NormalY,
@@ -2998,7 +2998,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k< numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -3006,128 +3006,128 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
               *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[dirE   *sizeQ];
-      nx_dirW   = &NormalX[dirW   *sizeQ];
-      nx_dirN   = &NormalX[dirN   *sizeQ];
-      nx_dirS   = &NormalX[dirS   *sizeQ];
-      nx_dirT   = &NormalX[dirT   *sizeQ];
-      nx_dirB   = &NormalX[dirB   *sizeQ];
-      nx_dirNE  = &NormalX[dirNE  *sizeQ];
-      nx_dirSW  = &NormalX[dirSW  *sizeQ];
-      nx_dirSE  = &NormalX[dirSE  *sizeQ];
-      nx_dirNW  = &NormalX[dirNW  *sizeQ];
-      nx_dirTE  = &NormalX[dirTE  *sizeQ];
-      nx_dirBW  = &NormalX[dirBW  *sizeQ];
-      nx_dirBE  = &NormalX[dirBE  *sizeQ];
-      nx_dirTW  = &NormalX[dirTW  *sizeQ];
-      nx_dirTN  = &NormalX[dirTN  *sizeQ];
-      nx_dirBS  = &NormalX[dirBS  *sizeQ];
-      nx_dirBN  = &NormalX[dirBN  *sizeQ];
-      nx_dirTS  = &NormalX[dirTS  *sizeQ];
-      nx_dirTNE = &NormalX[dirTNE *sizeQ];
-      nx_dirTSW = &NormalX[dirTSW *sizeQ];
-      nx_dirTSE = &NormalX[dirTSE *sizeQ];
-      nx_dirTNW = &NormalX[dirTNW *sizeQ];
-      nx_dirBNE = &NormalX[dirBNE *sizeQ];
-      nx_dirBSW = &NormalX[dirBSW *sizeQ];
-      nx_dirBSE = &NormalX[dirBSE *sizeQ];
-      nx_dirBNW = &NormalX[dirBNW *sizeQ];
+      nx_dirE   = &NormalX[dirE   * numberOfBCnodes];
+      nx_dirW   = &NormalX[dirW   * numberOfBCnodes];
+      nx_dirN   = &NormalX[dirN   * numberOfBCnodes];
+      nx_dirS   = &NormalX[dirS   * numberOfBCnodes];
+      nx_dirT   = &NormalX[dirT   * numberOfBCnodes];
+      nx_dirB   = &NormalX[dirB   * numberOfBCnodes];
+      nx_dirNE  = &NormalX[dirNE  * numberOfBCnodes];
+      nx_dirSW  = &NormalX[dirSW  * numberOfBCnodes];
+      nx_dirSE  = &NormalX[dirSE  * numberOfBCnodes];
+      nx_dirNW  = &NormalX[dirNW  * numberOfBCnodes];
+      nx_dirTE  = &NormalX[dirTE  * numberOfBCnodes];
+      nx_dirBW  = &NormalX[dirBW  * numberOfBCnodes];
+      nx_dirBE  = &NormalX[dirBE  * numberOfBCnodes];
+      nx_dirTW  = &NormalX[dirTW  * numberOfBCnodes];
+      nx_dirTN  = &NormalX[dirTN  * numberOfBCnodes];
+      nx_dirBS  = &NormalX[dirBS  * numberOfBCnodes];
+      nx_dirBN  = &NormalX[dirBN  * numberOfBCnodes];
+      nx_dirTS  = &NormalX[dirTS  * numberOfBCnodes];
+      nx_dirTNE = &NormalX[dirTNE * numberOfBCnodes];
+      nx_dirTSW = &NormalX[dirTSW * numberOfBCnodes];
+      nx_dirTSE = &NormalX[dirTSE * numberOfBCnodes];
+      nx_dirTNW = &NormalX[dirTNW * numberOfBCnodes];
+      nx_dirBNE = &NormalX[dirBNE * numberOfBCnodes];
+      nx_dirBSW = &NormalX[dirBSW * numberOfBCnodes];
+      nx_dirBSE = &NormalX[dirBSE * numberOfBCnodes];
+      nx_dirBNW = &NormalX[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
               *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[dirE   *sizeQ];
-      ny_dirW   = &NormalY[dirW   *sizeQ];
-      ny_dirN   = &NormalY[dirN   *sizeQ];
-      ny_dirS   = &NormalY[dirS   *sizeQ];
-      ny_dirT   = &NormalY[dirT   *sizeQ];
-      ny_dirB   = &NormalY[dirB   *sizeQ];
-      ny_dirNE  = &NormalY[dirNE  *sizeQ];
-      ny_dirSW  = &NormalY[dirSW  *sizeQ];
-      ny_dirSE  = &NormalY[dirSE  *sizeQ];
-      ny_dirNW  = &NormalY[dirNW  *sizeQ];
-      ny_dirTE  = &NormalY[dirTE  *sizeQ];
-      ny_dirBW  = &NormalY[dirBW  *sizeQ];
-      ny_dirBE  = &NormalY[dirBE  *sizeQ];
-      ny_dirTW  = &NormalY[dirTW  *sizeQ];
-      ny_dirTN  = &NormalY[dirTN  *sizeQ];
-      ny_dirBS  = &NormalY[dirBS  *sizeQ];
-      ny_dirBN  = &NormalY[dirBN  *sizeQ];
-      ny_dirTS  = &NormalY[dirTS  *sizeQ];
-      ny_dirTNE = &NormalY[dirTNE *sizeQ];
-      ny_dirTSW = &NormalY[dirTSW *sizeQ];
-      ny_dirTSE = &NormalY[dirTSE *sizeQ];
-      ny_dirTNW = &NormalY[dirTNW *sizeQ];
-      ny_dirBNE = &NormalY[dirBNE *sizeQ];
-      ny_dirBSW = &NormalY[dirBSW *sizeQ];
-      ny_dirBSE = &NormalY[dirBSE *sizeQ];
-      ny_dirBNW = &NormalY[dirBNW *sizeQ];
+      ny_dirE   = &NormalY[dirE   * numberOfBCnodes];
+      ny_dirW   = &NormalY[dirW   * numberOfBCnodes];
+      ny_dirN   = &NormalY[dirN   * numberOfBCnodes];
+      ny_dirS   = &NormalY[dirS   * numberOfBCnodes];
+      ny_dirT   = &NormalY[dirT   * numberOfBCnodes];
+      ny_dirB   = &NormalY[dirB   * numberOfBCnodes];
+      ny_dirNE  = &NormalY[dirNE  * numberOfBCnodes];
+      ny_dirSW  = &NormalY[dirSW  * numberOfBCnodes];
+      ny_dirSE  = &NormalY[dirSE  * numberOfBCnodes];
+      ny_dirNW  = &NormalY[dirNW  * numberOfBCnodes];
+      ny_dirTE  = &NormalY[dirTE  * numberOfBCnodes];
+      ny_dirBW  = &NormalY[dirBW  * numberOfBCnodes];
+      ny_dirBE  = &NormalY[dirBE  * numberOfBCnodes];
+      ny_dirTW  = &NormalY[dirTW  * numberOfBCnodes];
+      ny_dirTN  = &NormalY[dirTN  * numberOfBCnodes];
+      ny_dirBS  = &NormalY[dirBS  * numberOfBCnodes];
+      ny_dirBN  = &NormalY[dirBN  * numberOfBCnodes];
+      ny_dirTS  = &NormalY[dirTS  * numberOfBCnodes];
+      ny_dirTNE = &NormalY[dirTNE * numberOfBCnodes];
+      ny_dirTSW = &NormalY[dirTSW * numberOfBCnodes];
+      ny_dirTSE = &NormalY[dirTSE * numberOfBCnodes];
+      ny_dirTNW = &NormalY[dirTNW * numberOfBCnodes];
+      ny_dirBNE = &NormalY[dirBNE * numberOfBCnodes];
+      ny_dirBSW = &NormalY[dirBSW * numberOfBCnodes];
+      ny_dirBSE = &NormalY[dirBSE * numberOfBCnodes];
+      ny_dirBNW = &NormalY[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
               *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[dirE   *sizeQ];
-      nz_dirW   = &NormalZ[dirW   *sizeQ];
-      nz_dirN   = &NormalZ[dirN   *sizeQ];
-      nz_dirS   = &NormalZ[dirS   *sizeQ];
-      nz_dirT   = &NormalZ[dirT   *sizeQ];
-      nz_dirB   = &NormalZ[dirB   *sizeQ];
-      nz_dirNE  = &NormalZ[dirNE  *sizeQ];
-      nz_dirSW  = &NormalZ[dirSW  *sizeQ];
-      nz_dirSE  = &NormalZ[dirSE  *sizeQ];
-      nz_dirNW  = &NormalZ[dirNW  *sizeQ];
-      nz_dirTE  = &NormalZ[dirTE  *sizeQ];
-      nz_dirBW  = &NormalZ[dirBW  *sizeQ];
-      nz_dirBE  = &NormalZ[dirBE  *sizeQ];
-      nz_dirTW  = &NormalZ[dirTW  *sizeQ];
-      nz_dirTN  = &NormalZ[dirTN  *sizeQ];
-      nz_dirBS  = &NormalZ[dirBS  *sizeQ];
-      nz_dirBN  = &NormalZ[dirBN  *sizeQ];
-      nz_dirTS  = &NormalZ[dirTS  *sizeQ];
-      nz_dirTNE = &NormalZ[dirTNE *sizeQ];
-      nz_dirTSW = &NormalZ[dirTSW *sizeQ];
-      nz_dirTSE = &NormalZ[dirTSE *sizeQ];
-      nz_dirTNW = &NormalZ[dirTNW *sizeQ];
-      nz_dirBNE = &NormalZ[dirBNE *sizeQ];
-      nz_dirBSW = &NormalZ[dirBSW *sizeQ];
-      nz_dirBSE = &NormalZ[dirBSE *sizeQ];
-      nz_dirBNW = &NormalZ[dirBNW *sizeQ];
+      nz_dirE   = &NormalZ[dirE   * numberOfBCnodes];
+      nz_dirW   = &NormalZ[dirW   * numberOfBCnodes];
+      nz_dirN   = &NormalZ[dirN   * numberOfBCnodes];
+      nz_dirS   = &NormalZ[dirS   * numberOfBCnodes];
+      nz_dirT   = &NormalZ[dirT   * numberOfBCnodes];
+      nz_dirB   = &NormalZ[dirB   * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[dirNE  * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[dirSW  * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[dirSE  * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[dirNW  * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[dirTE  * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[dirBW  * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[dirBE  * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[dirTW  * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[dirTN  * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[dirBS  * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[dirBN  * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[dirTS  * numberOfBCnodes];
+      nz_dirTNE = &NormalZ[dirTNE * numberOfBCnodes];
+      nz_dirTSW = &NormalZ[dirTSW * numberOfBCnodes];
+      nz_dirTSE = &NormalZ[dirTSE * numberOfBCnodes];
+      nz_dirTNW = &NormalZ[dirTNW * numberOfBCnodes];
+      nz_dirBNE = &NormalZ[dirBNE * numberOfBCnodes];
+      nz_dirBSW = &NormalZ[dirBSW * numberOfBCnodes];
+      nz_dirBSE = &NormalZ[dirBSE * numberOfBCnodes];
+      nz_dirBNW = &NormalZ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3287,11 +3287,11 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 
 	  fac = fac * magS / (c1o3 * (c1o1 / om1 - c1o2));
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  real *facAst = &QQ[dirZERO *sizeQ];
+	  real *facAst = &QQ[dirZERO * numberOfBCnodes];
 
 	  fac = fac * alpha + facAst[k] * (c1o1 - alpha);
 	  facAst[k] = fac;
-	  //(&QQ[dirZERO *sizeQ])[KQK] = fac;
+	  //(&QQ[dirZERO * numberOfBCnodes])[KQK] = fac;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ////real uk = sqrtf(vx1*vx1 + vx2*vx2 + vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 129011139e14f3b49af7254253648b011b3c2ae4..822e5c4889d2c767f5cec85c6d7a7e4a62ab212c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -138,7 +138,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
                                     int* k_N,
 											   real* QQ,
-                                    unsigned int sizeQ,
+                                    unsigned int numberOfBCnodes,
                                     real om1,
                                     real* turbViscosity,
                                     real* vx,
@@ -239,7 +239,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ/*numberOfBCnodes*/)
+   if(k< numberOfBCnodes/*numberOfBCnodes*/)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
@@ -247,32 +247,32 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -916,7 +916,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
 											            int* k_Q,
                                              int* k_N,
                                              real* QQ,
-                                             unsigned int sizeQ,
+                                             unsigned int  numberOfBCnodes,
                                              real* vx,
                                              real* vy,
                                              real* vz,
@@ -1014,7 +1014,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ)
+   if(k< numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
@@ -1022,32 +1022,32 @@ extern "C" __global__ void BBStressDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
index 97d1afe99b39c3530fad5185c8ea4b98b2e647bf..a82d36323354daeb7d9bdc98510cc1af112b78a5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
@@ -21,7 +21,6 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
-	uint sizeQ,
 	int numberOfBCnodes, 
 	real om1, 
 	uint* neighborX,
@@ -114,32 +113,32 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       uint KQK  = k_Q[k];
@@ -451,8 +450,7 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
-	unsigned int sizeQ,
-	int numberOfBCnodes,
+	unsigned int numberOfBCnodes,
 	real om1,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -540,32 +538,32 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 			*q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW;
-		q_dirE = &QQ[dirE   *sizeQ];
-		q_dirW = &QQ[dirW   *sizeQ];
-		q_dirN = &QQ[dirN   *sizeQ];
-		q_dirS = &QQ[dirS   *sizeQ];
-		q_dirT = &QQ[dirT   *sizeQ];
-		q_dirB = &QQ[dirB   *sizeQ];
-		q_dirNE = &QQ[dirNE  *sizeQ];
-		q_dirSW = &QQ[dirSW  *sizeQ];
-		q_dirSE = &QQ[dirSE  *sizeQ];
-		q_dirNW = &QQ[dirNW  *sizeQ];
-		q_dirTE = &QQ[dirTE  *sizeQ];
-		q_dirBW = &QQ[dirBW  *sizeQ];
-		q_dirBE = &QQ[dirBE  *sizeQ];
-		q_dirTW = &QQ[dirTW  *sizeQ];
-		q_dirTN = &QQ[dirTN  *sizeQ];
-		q_dirBS = &QQ[dirBS  *sizeQ];
-		q_dirBN = &QQ[dirBN  *sizeQ];
-		q_dirTS = &QQ[dirTS  *sizeQ];
-		q_dirTNE = &QQ[dirTNE *sizeQ];
-		q_dirTSW = &QQ[dirTSW *sizeQ];
-		q_dirTSE = &QQ[dirTSE *sizeQ];
-		q_dirTNW = &QQ[dirTNW *sizeQ];
-		q_dirBNE = &QQ[dirBNE *sizeQ];
-		q_dirBSW = &QQ[dirBSW *sizeQ];
-		q_dirBSE = &QQ[dirBSE *sizeQ];
-		q_dirBNW = &QQ[dirBNW *sizeQ];
+		q_dirE = &QQ[dirE   * numberOfBCnodes];
+		q_dirW = &QQ[dirW   * numberOfBCnodes];
+		q_dirN = &QQ[dirN   * numberOfBCnodes];
+		q_dirS = &QQ[dirS   * numberOfBCnodes];
+		q_dirT = &QQ[dirT   * numberOfBCnodes];
+		q_dirB = &QQ[dirB   * numberOfBCnodes];
+		q_dirNE = &QQ[dirNE  * numberOfBCnodes];
+		q_dirSW = &QQ[dirSW  * numberOfBCnodes];
+		q_dirSE = &QQ[dirSE  * numberOfBCnodes];
+		q_dirNW = &QQ[dirNW  * numberOfBCnodes];
+		q_dirTE = &QQ[dirTE  * numberOfBCnodes];
+		q_dirBW = &QQ[dirBW  * numberOfBCnodes];
+		q_dirBE = &QQ[dirBE  * numberOfBCnodes];
+		q_dirTW = &QQ[dirTW  * numberOfBCnodes];
+		q_dirTN = &QQ[dirTN  * numberOfBCnodes];
+		q_dirBS = &QQ[dirBS  * numberOfBCnodes];
+		q_dirBN = &QQ[dirBN  * numberOfBCnodes];
+		q_dirTS = &QQ[dirTS  * numberOfBCnodes];
+		q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+		q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+		q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+		q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+		q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+		q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+		q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+		q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK = k_Q[k];
@@ -877,8 +875,7 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
-	uint sizeQ,
-	int numberOfBCnodes, 
+	uint numberOfBCnodes, 
 	uint* geom,
 	uint* neighborX,
 	uint* neighborY,
@@ -906,32 +903,32 @@ extern "C" __global__ void QThinWallsPartTwo27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       uint KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
index 523a33578c770de6c5f2e9d4c91bbbf2cdc49077..c73b82a24a03b878f40240bc6a829abddf43cf17 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
@@ -15,16 +15,14 @@
 using namespace vf::lbm::constant;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompPlusSlip27(int inx,
-													int iny,
+extern "C" __global__ void QVelDeviceCompPlusSlip27(
 													real* vx,
 													real* vy,
 													real* vz,
 													real* DD, 
 													int* k_Q, 
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes, 
+													unsigned int numberOfBCnodes, 
 													real om1, 
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -116,32 +114,32 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(int inx,
 		   *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 		   *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 		   *q_dirBSE, *q_dirBNW;
-	   q_dirE = &QQ[dirE   *sizeQ];
-	   q_dirW = &QQ[dirW   *sizeQ];
-	   q_dirN = &QQ[dirN   *sizeQ];
-	   q_dirS = &QQ[dirS   *sizeQ];
-	   q_dirT = &QQ[dirT   *sizeQ];
-	   q_dirB = &QQ[dirB   *sizeQ];
-	   q_dirNE = &QQ[dirNE  *sizeQ];
-	   q_dirSW = &QQ[dirSW  *sizeQ];
-	   q_dirSE = &QQ[dirSE  *sizeQ];
-	   q_dirNW = &QQ[dirNW  *sizeQ];
-	   q_dirTE = &QQ[dirTE  *sizeQ];
-	   q_dirBW = &QQ[dirBW  *sizeQ];
-	   q_dirBE = &QQ[dirBE  *sizeQ];
-	   q_dirTW = &QQ[dirTW  *sizeQ];
-	   q_dirTN = &QQ[dirTN  *sizeQ];
-	   q_dirBS = &QQ[dirBS  *sizeQ];
-	   q_dirBN = &QQ[dirBN  *sizeQ];
-	   q_dirTS = &QQ[dirTS  *sizeQ];
-	   q_dirTNE = &QQ[dirTNE *sizeQ];
-	   q_dirTSW = &QQ[dirTSW *sizeQ];
-	   q_dirTSE = &QQ[dirTSE *sizeQ];
-	   q_dirTNW = &QQ[dirTNW *sizeQ];
-	   q_dirBNE = &QQ[dirBNE *sizeQ];
-	   q_dirBSW = &QQ[dirBSW *sizeQ];
-	   q_dirBSE = &QQ[dirBSE *sizeQ];
-	   q_dirBNW = &QQ[dirBNW *sizeQ];
+	   q_dirE = &QQ[dirE   * numberOfBCnodes];
+	   q_dirW = &QQ[dirW   * numberOfBCnodes];
+	   q_dirN = &QQ[dirN   * numberOfBCnodes];
+	   q_dirS = &QQ[dirS   * numberOfBCnodes];
+	   q_dirT = &QQ[dirT   * numberOfBCnodes];
+	   q_dirB = &QQ[dirB   * numberOfBCnodes];
+	   q_dirNE = &QQ[dirNE  * numberOfBCnodes];
+	   q_dirSW = &QQ[dirSW  * numberOfBCnodes];
+	   q_dirSE = &QQ[dirSE  * numberOfBCnodes];
+	   q_dirNW = &QQ[dirNW  * numberOfBCnodes];
+	   q_dirTE = &QQ[dirTE  * numberOfBCnodes];
+	   q_dirBW = &QQ[dirBW  * numberOfBCnodes];
+	   q_dirBE = &QQ[dirBE  * numberOfBCnodes];
+	   q_dirTW = &QQ[dirTW  * numberOfBCnodes];
+	   q_dirTN = &QQ[dirTN  * numberOfBCnodes];
+	   q_dirBS = &QQ[dirBS  * numberOfBCnodes];
+	   q_dirBN = &QQ[dirBN  * numberOfBCnodes];
+	   q_dirTS = &QQ[dirTS  * numberOfBCnodes];
+	   q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+	   q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+	   q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+	   q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+	   q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+	   q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+	   q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+	   q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 	   ////////////////////////////////////////////////////////////////////////////////
 	   //index
 	   unsigned int KQK = k_Q[k];
@@ -1128,8 +1126,7 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
 													real* DD, 
 													int* k_Q, 
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes, 
+													unsigned int numberOfBCnodes, 
 													real om1, 
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -1221,32 +1218,32 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1620,17 +1617,15 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompHighNu27(  int inx,
-													int iny,
+extern "C" __global__ void QVelDeviceCompHighNu27(
 													real* vx,
 													real* vy,
 													real* vz,
-													real* DD, 
-													int* k_Q, 
+													real* DD,
+													int* k_Q,
 													real* QQ,
-													unsigned int sizeQ,
-													int numberOfBCnodes, 
-													real om1, 
+													unsigned int numberOfBCnodes, 
+													real om1,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
 													unsigned int* neighborZ,
@@ -1721,32 +1716,32 @@ extern "C" __global__ void QVelDeviceCompHighNu27(  int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2198,136 +2193,67 @@ extern "C" __global__ void QVelDeviceCompHighNu27(  int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompZeroPress27(   int inx,
-														int iny,
-														real* vx,
-														real* vy,
-														real* vz,
-														real* DD, 
-														int* k_Q, 
-														real* QQ,
-														unsigned int sizeQ,
-														//int numberOfBCnodes, 
-														real om1, 
+extern "C" __global__ void QVelDeviceCompZeroPress27(
+														real* velocityX,
+														real* velocityY,
+														real* velocityZ,
+														real* distribution, 
+														int* subgridDistanceIndices, 
+														real* subgridDistances,
+														unsigned int numberOfBCnodes, 
+														real omega, 
 														unsigned int* neighborX,
 														unsigned int* neighborY,
 														unsigned int* neighborZ,
-														unsigned int size_Mat, 
+														unsigned int numberOfLBnodes, 
 														bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[dirE   ] = &DD[dirE   *size_Mat];
-      D.f[dirW   ] = &DD[dirW   *size_Mat];
-      D.f[dirN   ] = &DD[dirN   *size_Mat];
-      D.f[dirS   ] = &DD[dirS   *size_Mat];
-      D.f[dirT   ] = &DD[dirT   *size_Mat];
-      D.f[dirB   ] = &DD[dirB   *size_Mat];
-      D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-      D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-      D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-      D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-      D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-      D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-      D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-      D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-      D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-      D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-      D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-      D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-      D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-      D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-      D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-      D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-      D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-   } 
-   else
-   {
-      D.f[dirW   ] = &DD[dirE   *size_Mat];
-      D.f[dirE   ] = &DD[dirW   *size_Mat];
-      D.f[dirS   ] = &DD[dirN   *size_Mat];
-      D.f[dirN   ] = &DD[dirS   *size_Mat];
-      D.f[dirB   ] = &DD[dirT   *size_Mat];
-      D.f[dirT   ] = &DD[dirB   *size_Mat];
-      D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-      D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-      D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-      D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-      D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-      D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-      D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-      D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-      D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-      D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-      D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-      D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-      D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-      D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-      D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-      D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-      D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //////////////////////////////////////////////////////////////////////////
+	//! The velocity boundary condition is executed in the following steps
+	//!
+	////////////////////////////////////////////////////////////////////////////////
+	//! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
+	//!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
 
    const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<sizeQ/*numberOfBCnodes*/)
+   //////////////////////////////////////////////////////////////////////////
+   //! - Run for all indices in size of boundary condition (numberOfBCnodes)
+   //!
+   if(k < numberOfBCnodes)
    {
+
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distribution, numberOfLBnodes, isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////
-      real VeloX = vx[k];
-      real VeloY = vy[k];
-      real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny)
+      //! - Set local velocities
+      //!
+      real VeloX = velocityX[k];
+      real VeloY = velocityY[k];
+      real VeloZ = velocityZ[k];
+
+
       ////////////////////////////////////////////////////////////////////////////////
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-            *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-            *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-            *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-            *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+     
       ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int KQK  = subgridDistanceIndices[k];
       unsigned int kzero= KQK;
       unsigned int ke   = KQK;
       unsigned int kw   = neighborX[KQK];
@@ -2355,331 +2281,299 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(   int inx,
       unsigned int kbne = kb;
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
+
       ////////////////////////////////////////////////////////////////////////////////
-      real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
-         f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[dirE   ])[ke   ];
+      real f_E    = (dist.f[dirW   ])[kw   ];
+      real f_S    = (dist.f[dirN   ])[kn   ];
+      real f_N    = (dist.f[dirS   ])[ks   ];
+      real f_B    = (dist.f[dirT   ])[kt   ];
+      real f_T    = (dist.f[dirB   ])[kb   ];
+      real f_SW   = (dist.f[dirNE  ])[kne  ];
+      real f_NE   = (dist.f[dirSW  ])[ksw  ];
+      real f_NW   = (dist.f[dirSE  ])[kse  ];
+      real f_SE   = (dist.f[dirNW  ])[knw  ];
+      real f_BW   = (dist.f[dirTE  ])[kte  ];
+      real f_TE   = (dist.f[dirBW  ])[kbw  ];
+      real f_TW   = (dist.f[dirBE  ])[kbe  ];
+      real f_BE   = (dist.f[dirTW  ])[ktw  ];
+      real f_BS   = (dist.f[dirTN  ])[ktn  ];
+      real f_TN   = (dist.f[dirBS  ])[kbs  ];
+      real f_TS   = (dist.f[dirBN  ])[kbn  ];
+      real f_BN   = (dist.f[dirTS  ])[kts  ];
+      real f_BSW  = (dist.f[dirTNE ])[ktne ];
+      real f_BNE  = (dist.f[dirTSW ])[ktsw ];
+      real f_BNW  = (dist.f[dirTSE ])[ktse ];
+      real f_BSE  = (dist.f[dirTNW ])[ktnw ];
+      real f_TSW  = (dist.f[dirBNE ])[kbne ];
+      real f_TNE  = (dist.f[dirBSW ])[kbsw ];
+      real f_TNW  = (dist.f[dirBSE ])[kbse ];
+      real f_TSE  = (dist.f[dirBNW ])[kbnw ];
 
-      f_W    = (D.f[dirE   ])[ke   ];
-      f_E    = (D.f[dirW   ])[kw   ];
-      f_S    = (D.f[dirN   ])[kn   ];
-      f_N    = (D.f[dirS   ])[ks   ];
-      f_B    = (D.f[dirT   ])[kt   ];
-      f_T    = (D.f[dirB   ])[kb   ];
-      f_SW   = (D.f[dirNE  ])[kne  ];
-      f_NE   = (D.f[dirSW  ])[ksw  ];
-      f_NW   = (D.f[dirSE  ])[kse  ];
-      f_SE   = (D.f[dirNW  ])[knw  ];
-      f_BW   = (D.f[dirTE  ])[kte  ];
-      f_TE   = (D.f[dirBW  ])[kbw  ];
-      f_TW   = (D.f[dirBE  ])[kbe  ];
-      f_BE   = (D.f[dirTW  ])[ktw  ];
-      f_BS   = (D.f[dirTN  ])[ktn  ];
-      f_TN   = (D.f[dirBS  ])[kbs  ];
-      f_TS   = (D.f[dirBN  ])[kbn  ];
-      f_BN   = (D.f[dirTS  ])[kts  ];
-      f_BSW  = (D.f[dirTNE ])[ktne ];
-      f_BNE  = (D.f[dirTSW ])[ktsw ];
-      f_BNW  = (D.f[dirTSE ])[ktse ];
-      f_BSE  = (D.f[dirTNW ])[ktnw ];
-      f_TSW  = (D.f[dirBNE ])[kbne ];
-      f_TNE  = (D.f[dirBSW ])[kbsw ];
-      f_TNW  = (D.f[dirBSE ])[kbse ];
-      f_TSE  = (D.f[dirBNW ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
-      real vx1, vx2, vx3, drho, feq, q;
-      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[dirZERO])[kzero]); 
+      //! - Calculate macroscopic quantities
+      //!
+      real drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                     f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                     f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[dirZERO])[kzero]); 
 
-      vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                (f_E - f_W)) / (c1o1 + drho); 
+      real vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                      ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                      (f_E - f_W)) / (c1o1 + drho); 
          
 
-      vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                 ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                 (f_N - f_S)) / (c1o1 + drho); 
-
-      vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-                 (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                 (f_T - f_B)) / (c1o1 + drho); 
+      real vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                       ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                       (f_N - f_S)) / (c1o1 + drho); 
 
+      real vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                       (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                       (f_T - f_B)) / (c1o1 + drho); 
+    
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
 
-      //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[dirE   ] = &DD[dirE   *size_Mat];
-         D.f[dirW   ] = &DD[dirW   *size_Mat];
-         D.f[dirN   ] = &DD[dirN   *size_Mat];
-         D.f[dirS   ] = &DD[dirS   *size_Mat];
-         D.f[dirT   ] = &DD[dirT   *size_Mat];
-         D.f[dirB   ] = &DD[dirB   *size_Mat];
-         D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-         D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-         D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-         D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-         D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-         D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-         D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-         D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-         D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-         D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-         D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-         D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-         D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-         D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-         D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-         D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-         D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-      } 
-      else
-      {
-         D.f[dirW   ] = &DD[dirE   *size_Mat];
-         D.f[dirE   ] = &DD[dirW   *size_Mat];
-         D.f[dirS   ] = &DD[dirN   *size_Mat];
-         D.f[dirN   ] = &DD[dirS   *size_Mat];
-         D.f[dirB   ] = &DD[dirT   *size_Mat];
-         D.f[dirT   ] = &DD[dirB   *size_Mat];
-         D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-         D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-         D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-         D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-         D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-         D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-         D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-         D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-         D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-         D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-         D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-         D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-         D.f[dirZERO] = &DD[dirZERO*size_Mat];
-         D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-         D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-         D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-         D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-         D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-         D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-         D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-         D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-      }
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //Test
-      //(D.f[dirZERO])[k]=c1o10;
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  //ToDo anders Klammern
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distribution, numberOfLBnodes, !isEvenTimestep);
 
-      q = q_dirE[k];
-      if (q>=c0o1 && q<=c1o1)
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      real feq, q, velocityLB, velocityBC;
+      q = (subgridD.q[dirE])[k];
+      if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
-         feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirW])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirW])[kw]=zero;
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloX;
+         (dist.f[dirW])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirW[k];
+      q = (subgridD.q[dirW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirE])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirE])[ke]=zero;
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloX;
+         (dist.f[dirE])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirN[k];
+      q = (subgridD.q[dirN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirS])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirS])[ks]=zero;
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloY;
+         (dist.f[dirS])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirS[k];
+      q = (subgridD.q[dirS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirN])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirN])[kn]=zero;
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloY;
+         (dist.f[dirN])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirT[k];
+      q = (subgridD.q[dirT])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirB])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirB])[kb]=one;
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloZ;
+         (dist.f[dirB])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirB[k];
+      q = (subgridD.q[dirB])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirT])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //(D.f[dirT])[kt]=zero;
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloZ;
+         (dist.f[dirT])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = q_dirNE[k];
+      q = (subgridD.q[dirNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirSW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirSW])[ksw]=zero;
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloY;
+         (dist.f[dirSW])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirSW[k];
+      q = (subgridD.q[dirSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirNE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirNE])[kne]=zero;
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloY;
+         (dist.f[dirNE])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirSE[k];
+      q = (subgridD.q[dirSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirNW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirNW])[knw]=zero;
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloY;
+         (dist.f[dirNW])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirNW[k];
+      q = (subgridD.q[dirNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirSE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirSE])[kse]=zero;
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloY;
+         (dist.f[dirSE])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTE[k];
+      q = (subgridD.q[dirTE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirBW])[kbw]=zero;
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloZ;
+         (dist.f[dirBW])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBW[k];
+      q = (subgridD.q[dirBW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirTE])[kte]=zero;
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloZ;
+         (dist.f[dirTE])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBE[k];
+      q = (subgridD.q[dirBE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirTW])[ktw]=zero;
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloZ;
+         (dist.f[dirTW])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTW[k];
+      q = (subgridD.q[dirTW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirBE])[kbe]=zero;
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloZ;
+         (dist.f[dirBE])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTN[k];
+      q = (subgridD.q[dirTN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirBS])[kbs]=zero;
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY + VeloZ;
+         (dist.f[dirBS])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBS[k];
+      q = (subgridD.q[dirBS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirTN])[ktn]=zero;
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY - VeloZ;
+         (dist.f[dirTN])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBN[k];
+      q = (subgridD.q[dirBN])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirTS])[kts]=zero;
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY - VeloZ;
+         (dist.f[dirTS])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTS[k];
+      q = (subgridD.q[dirTS])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //(D.f[dirBN])[kbn]=zero;
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY + VeloZ;
+         (dist.f[dirBN])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTNE[k];
+      q = (subgridD.q[dirTNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirBSW])[kbsw]=zero;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY + VeloZ;
+         (dist.f[dirBSW])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBSW[k];
+      q = (subgridD.q[dirBSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirTNE])[ktne]=zero;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY - VeloZ;
+         (dist.f[dirTNE])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBNE[k];
+      q = (subgridD.q[dirBNE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirTSW])[ktsw]=zero;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY - VeloZ;
+         (dist.f[dirTSW])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTSW[k];
+      q = (subgridD.q[dirTSW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirBNE])[kbne]=zero;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY + VeloZ;
+         (dist.f[dirBNE])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTSE[k];
+      q = (subgridD.q[dirTSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirBNW])[kbnw]=zero;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY + VeloZ;
+         (dist.f[dirBNW])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBNW[k];
+      q = (subgridD.q[dirBNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirTSE])[ktse]=zero;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY - VeloZ;
+         (dist.f[dirTSE])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBSE[k];
+      q = (subgridD.q[dirBSE])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirTNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirTNW])[ktnw]=zero;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY - VeloZ;
+         (dist.f[dirTNW])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTNW[k];
+      q = (subgridD.q[dirTNW])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[dirBSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //(D.f[dirBSE])[kbse]=zero;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY + VeloZ;
+         (dist.f[dirBSE])[kbse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNW, f_BSE, feq, omega, drho, velocityBC, c1o216);
       }
    }
 }
@@ -2721,6 +2615,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(   int inx,
 
 
 
+
 
 
 //////////////////////////////////////////////////////////////////////////////
@@ -2732,8 +2627,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
 														real* DD, 
 														int* k_Q, 
 														real* QQ,
-														unsigned int sizeQ,
-														int numberOfBCnodes, 
+														unsigned int numberOfBCnodes,
 														real om1, 
 														real Phi,
 														real angularVelocity,
@@ -2844,32 +2738,32 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3525,8 +3419,7 @@ extern "C" __global__ void QVelDevPlainBB27(real* vx,
 											real* DD,
 											int* k_Q, 
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes, 
+											unsigned int numberOfBCnodes, 
 											real om1, 
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -3618,32 +3511,32 @@ extern "C" __global__ void QVelDevPlainBB27(real* vx,
 			 *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			 *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			 *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3838,14 +3731,13 @@ extern "C" __global__ void QVelDevPlainBB27(real* vx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevCouhette27(real* vx,
+extern "C" __global__ void QVelDevCouette27(real* vx,
 											real* vy,
 	 										real* vz,
 											real* DD,
 											int* k_Q, 
 											real* QQ,
-											unsigned int sizeQ,
-											int numberOfBCnodes, 
+											unsigned int numberOfBCnodes, 
 											real om1, 
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -3937,32 +3829,32 @@ extern "C" __global__ void QVelDevCouhette27(real* vx,
 			 *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			 *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			 *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4207,8 +4099,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 										real* DD, 
 										int* k_Q, 
 										real* QQ,
-										unsigned int sizeQ,
-										int numberOfBCnodes, 
+										unsigned int numberOfBCnodes, 
 										real om1,
 										real Phi,
 										real angularVelocity,
@@ -4315,32 +4206,32 @@ extern "C" __global__ void QVelDev1h27( int inx,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[dirE   *sizeQ];
-		q_dirW   = &QQ[dirW   *sizeQ];
-		q_dirN   = &QQ[dirN   *sizeQ];
-		q_dirS   = &QQ[dirS   *sizeQ];
-		q_dirT   = &QQ[dirT   *sizeQ];
-		q_dirB   = &QQ[dirB   *sizeQ];
-		q_dirNE  = &QQ[dirNE  *sizeQ];
-		q_dirSW  = &QQ[dirSW  *sizeQ];
-		q_dirSE  = &QQ[dirSE  *sizeQ];
-		q_dirNW  = &QQ[dirNW  *sizeQ];
-		q_dirTE  = &QQ[dirTE  *sizeQ];
-		q_dirBW  = &QQ[dirBW  *sizeQ];
-		q_dirBE  = &QQ[dirBE  *sizeQ];
-		q_dirTW  = &QQ[dirTW  *sizeQ];
-		q_dirTN  = &QQ[dirTN  *sizeQ];
-		q_dirBS  = &QQ[dirBS  *sizeQ];
-		q_dirBN  = &QQ[dirBN  *sizeQ];
-		q_dirTS  = &QQ[dirTS  *sizeQ];
-		q_dirTNE = &QQ[dirTNE *sizeQ];
-		q_dirTSW = &QQ[dirTSW *sizeQ];
-		q_dirTSE = &QQ[dirTSE *sizeQ];
-		q_dirTNW = &QQ[dirTNW *sizeQ];
-		q_dirBNE = &QQ[dirBNE *sizeQ];
-		q_dirBSW = &QQ[dirBSW *sizeQ];
-		q_dirBSE = &QQ[dirBSE *sizeQ];
-		q_dirBNW = &QQ[dirBNW *sizeQ];
+		q_dirE   = &QQ[dirE   * numberOfBCnodes];
+		q_dirW   = &QQ[dirW   * numberOfBCnodes];
+		q_dirN   = &QQ[dirN   * numberOfBCnodes];
+		q_dirS   = &QQ[dirS   * numberOfBCnodes];
+		q_dirT   = &QQ[dirT   * numberOfBCnodes];
+		q_dirB   = &QQ[dirB   * numberOfBCnodes];
+		q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+		q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+		q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+		q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+		q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+		q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+		q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+		q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+		q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+		q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+		q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+		q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+		q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+		q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+		q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+		q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+		q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+		q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+		q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+		q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -4990,8 +4881,7 @@ extern "C" __global__ void QVelDeviceComp27(
 											real* distribution,
 											int* subgridDistanceIndices,
 											real* subgridDistances,
-											unsigned int numberOfSubgridIndices,
-											int numberOfBCnodes,
+											unsigned int numberOfBCnodes,
 											real omega,
 											unsigned int* neighborX,
 											unsigned int* neighborY,
@@ -5000,11 +4890,11 @@ extern "C" __global__ void QVelDeviceComp27(
 											bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
-	//! The velocity boundary condition is executed in the following steps
-	//!
-	////////////////////////////////////////////////////////////////////////////////
-	//! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
-	//!
+   //! The velocity boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
+   //!
    const unsigned  x = threadIdx.x;  // global x-index 
    const unsigned  y = blockIdx.x;   // global y-index 
    const unsigned  z = blockIdx.y;   // global z-index 
@@ -5037,29 +4927,29 @@ extern "C" __global__ void QVelDeviceComp27(
       //! - Set local subgrid distances (q's)
       //!
       SubgridDistances27 subgridD;
-      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfSubgridIndices);
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
       
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set neighbor indices (necessary for indirect addressing)
       //!
-      unsigned int KQK  = subgridDistanceIndices[k];
-      unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
       unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
+      unsigned int kne  = indexOfBCnode;
       unsigned int kse  = ks;
       unsigned int knw  = kw;
       unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
+      unsigned int kte  = indexOfBCnode;
       unsigned int kbe  = kb;
       unsigned int ktw  = kw;
       unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
+      unsigned int ktn  = indexOfBCnode;
       unsigned int kbn  = kb;
       unsigned int kts  = ks;
       unsigned int ktse = ks;
@@ -5068,7 +4958,7 @@ extern "C" __global__ void QVelDeviceComp27(
       unsigned int kbse = kbs;
       unsigned int ktsw = ksw;
       unsigned int kbne = kb;
-      unsigned int ktne = KQK;
+      unsigned int ktne = indexOfBCnode;
       unsigned int kbsw = neighborZ[ksw];
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -5414,8 +5304,7 @@ extern "C" __global__ void QVelDevice27(int inx,
                                         real* DD, 
                                         int* k_Q, 
                                         real* QQ,
-                                        unsigned int sizeQ,
-                                        int numberOfBCnodes, 
+                                        unsigned int numberOfBCnodes, 
                                         real om1, 
                                         unsigned int* neighborX,
                                         unsigned int* neighborY,
@@ -5507,32 +5396,32 @@ extern "C" __global__ void QVelDevice27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[dirE   *sizeQ];
-      q_dirW   = &QQ[dirW   *sizeQ];
-      q_dirN   = &QQ[dirN   *sizeQ];
-      q_dirS   = &QQ[dirS   *sizeQ];
-      q_dirT   = &QQ[dirT   *sizeQ];
-      q_dirB   = &QQ[dirB   *sizeQ];
-      q_dirNE  = &QQ[dirNE  *sizeQ];
-      q_dirSW  = &QQ[dirSW  *sizeQ];
-      q_dirSE  = &QQ[dirSE  *sizeQ];
-      q_dirNW  = &QQ[dirNW  *sizeQ];
-      q_dirTE  = &QQ[dirTE  *sizeQ];
-      q_dirBW  = &QQ[dirBW  *sizeQ];
-      q_dirBE  = &QQ[dirBE  *sizeQ];
-      q_dirTW  = &QQ[dirTW  *sizeQ];
-      q_dirTN  = &QQ[dirTN  *sizeQ];
-      q_dirBS  = &QQ[dirBS  *sizeQ];
-      q_dirBN  = &QQ[dirBN  *sizeQ];
-      q_dirTS  = &QQ[dirTS  *sizeQ];
-      q_dirTNE = &QQ[dirTNE *sizeQ];
-      q_dirTSW = &QQ[dirTSW *sizeQ];
-      q_dirTSE = &QQ[dirTSE *sizeQ];
-      q_dirTNW = &QQ[dirTNW *sizeQ];
-      q_dirBNE = &QQ[dirBNE *sizeQ];
-      q_dirBSW = &QQ[dirBSW *sizeQ];
-      q_dirBSE = &QQ[dirBSE *sizeQ];
-      q_dirBNW = &QQ[dirBNW *sizeQ];
+      q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
index 50e3df869f96794e9d8240361b1d828365d61470..df5fdc67507457dc00264fae1ee5d55d4ecb7da5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
@@ -7,16 +7,14 @@ using namespace vf::lbm::constant;
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void WallFunction27(int inx,
-										  int iny,
+extern "C" __global__ void WallFunction27(
 										  real* vx,
 										  real* vy,
 										  real* vz,
 										  real* DD, 
 										  int* k_Q, 
 										  real* QQ,
-										  unsigned int sizeQ,
-									      int numberOfBCnodes, 
+										  unsigned int numberOfBCnodes, 
 										  real om1, 
 										  unsigned int* neighborX,
 										  unsigned int* neighborY,
@@ -108,32 +106,32 @@ extern "C" __global__ void WallFunction27(int inx,
       //      *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
       //      *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //      *q_dirBSE, *q_dirBNW; 
-      //q_dirE   = &QQ[dirE   *sizeQ];
-      //q_dirW   = &QQ[dirW   *sizeQ];
-      //q_dirN   = &QQ[dirN   *sizeQ];
-      //q_dirS   = &QQ[dirS   *sizeQ];
-      //q_dirT   = &QQ[dirT   *sizeQ];
-      //q_dirB   = &QQ[dirB   *sizeQ];
-      //q_dirNE  = &QQ[dirNE  *sizeQ];
-      //q_dirSW  = &QQ[dirSW  *sizeQ];
-      //q_dirSE  = &QQ[dirSE  *sizeQ];
-      //q_dirNW  = &QQ[dirNW  *sizeQ];
-      //q_dirTE  = &QQ[dirTE  *sizeQ];
-      //q_dirBW  = &QQ[dirBW  *sizeQ];
-      //q_dirBE  = &QQ[dirBE  *sizeQ];
-      //q_dirTW  = &QQ[dirTW  *sizeQ];
-      //q_dirTN  = &QQ[dirTN  *sizeQ];
-      //q_dirBS  = &QQ[dirBS  *sizeQ];
-      //q_dirBN  = &QQ[dirBN  *sizeQ];
-      //q_dirTS  = &QQ[dirTS  *sizeQ];
-      //q_dirTNE = &QQ[dirTNE *sizeQ];
-      //q_dirTSW = &QQ[dirTSW *sizeQ];
-      //q_dirTSE = &QQ[dirTSE *sizeQ];
-      //q_dirTNW = &QQ[dirTNW *sizeQ];
-      //q_dirBNE = &QQ[dirBNE *sizeQ];
-      //q_dirBSW = &QQ[dirBSW *sizeQ];
-      //q_dirBSE = &QQ[dirBSE *sizeQ];
-      //q_dirBNW = &QQ[dirBNW *sizeQ];
+      //q_dirE   = &QQ[dirE   * numberOfBCnodes];
+      //q_dirW   = &QQ[dirW   * numberOfBCnodes];
+      //q_dirN   = &QQ[dirN   * numberOfBCnodes];
+      //q_dirS   = &QQ[dirS   * numberOfBCnodes];
+      //q_dirT   = &QQ[dirT   * numberOfBCnodes];
+      //q_dirB   = &QQ[dirB   * numberOfBCnodes];
+      //q_dirNE  = &QQ[dirNE  * numberOfBCnodes];
+      //q_dirSW  = &QQ[dirSW  * numberOfBCnodes];
+      //q_dirSE  = &QQ[dirSE  * numberOfBCnodes];
+      //q_dirNW  = &QQ[dirNW  * numberOfBCnodes];
+      //q_dirTE  = &QQ[dirTE  * numberOfBCnodes];
+      //q_dirBW  = &QQ[dirBW  * numberOfBCnodes];
+      //q_dirBE  = &QQ[dirBE  * numberOfBCnodes];
+      //q_dirTW  = &QQ[dirTW  * numberOfBCnodes];
+      //q_dirTN  = &QQ[dirTN  * numberOfBCnodes];
+      //q_dirBS  = &QQ[dirBS  * numberOfBCnodes];
+      //q_dirBN  = &QQ[dirBN  * numberOfBCnodes];
+      //q_dirTS  = &QQ[dirTS  * numberOfBCnodes];
+      //q_dirTNE = &QQ[dirTNE * numberOfBCnodes];
+      //q_dirTSW = &QQ[dirTSW * numberOfBCnodes];
+      //q_dirTSE = &QQ[dirTSE * numberOfBCnodes];
+      //q_dirTNW = &QQ[dirTNW * numberOfBCnodes];
+      //q_dirBNE = &QQ[dirBNE * numberOfBCnodes];
+      //q_dirBSW = &QQ[dirBSW * numberOfBCnodes];
+      //q_dirBSE = &QQ[dirBSE * numberOfBCnodes];
+      //q_dirBNW = &QQ[dirBNW * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
diff --git a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
index 3ddc257812c556ef6b7d8103cf42e69c8195d044..e6ee3eb1ca50511d7872ef6151eaecdb46bb9b41 100644
--- a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
@@ -20,7 +20,7 @@ void PositionReader::readFilePropellerCylinderForAlloc(Parameter* para)
 		in.readLine();
 		if (level == para->getFine())
 		{
-			for(int u=0; u<para->getParH(level)->propellerBC.numberOfBCnodes; u++)
+			for(uint u=0; u<para->getParH(level)->propellerBC.numberOfBCnodes; u++)
 			{
 				test = in.readInteger();
 				if (para->getParH(level)->typeOfGridNode[test] == GEO_FLUID)
@@ -55,7 +55,7 @@ void PositionReader::readFilePropellerCylinderForAlloc(Parameter* para)
 		}
 		else
 		{
-			for(int u=0; u<para->getParH(level)->propellerBC.numberOfBCnodes; u++)
+			for(uint u=0; u<para->getParH(level)->propellerBC.numberOfBCnodes; u++)
 			{
 				in.readInteger();
 				in.readDouble();
@@ -167,7 +167,7 @@ void PositionReader::definePropellerQs(Parameter* para)
 	Q.q27[dirBSE ] = &QQ[dirBSE *sizeQ];
 	Q.q27[dirBNW ] = &QQ[dirBNW *sizeQ];
 	//////////////////////////////////////////////////////////////////
-	for(int u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++)
+	for(uint u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++)
 	{
 		for (int dir = dirE; dir<=dirBSW; dir++)
 		{
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
index 24ac3655525e463b9cc54d78182ea2af23e071c2..53d38e96e0e365dc933ebfbbf2f82b8f82306bc9 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/ADKernelManager.cpp
@@ -180,7 +180,7 @@ void ADKernelManager::runADcollisionKernel(int level)
 }
 
 void ADKernelManager::runADslipBCKernel(int level){
-    if (para->getParD(level)->numberOfSlipBCnodes > 1) {
+    if (para->getParD(level)->slipBC.numberOfBCnodes > 1) {
         ADSlipVelDevComp(
             para->getParD(level)->numberofthreads,
             para->getParD(level)->slipBC.normalX,
@@ -190,7 +190,7 @@ void ADKernelManager::runADslipBCKernel(int level){
             para->getParD(level)->distributionsAD27.f[0],
             para->getParD(level)->slipBC.k,
             para->getParD(level)->slipBC.q27[0],
-            para->getParD(level)->numberOfSlipBCnodes,
+            para->getParD(level)->slipBC.numberOfBCnodes,
             para->getParD(level)->omegaDiffusivity,
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
@@ -205,8 +205,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
         if (para->getDiffMod() == 7) {
             // QADPressIncompDev7( 
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD7.f[0],
             //     para->getParD(level)->TempPress.temp,
@@ -215,7 +213,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
             //     para->getParD(level)->TempPress.k,
             //     para->getParD(level)->pressureBC.q27[0],
             //     para->getParD(level)->TempPress.kTemp,
-            //     para->getParD(level)->TempPress.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -228,8 +225,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
              //////////////////////////////////////////////////////////////////////////
             QADPressDev7(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD7.f[0],
                 para->getParD(level)->TempPress.temp,
@@ -238,7 +233,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
                 para->getParD(level)->TempPress.k,
                 para->getParD(level)->pressureBC.q27[0],
                 para->getParD(level)->TempPress.kTemp,
-                para->getParD(level)->TempPress.kTemp,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -249,8 +243,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
         } else if (para->getDiffMod() == 27) {
             // QADPressIncompDev27(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD27.f[0],
             //     para->getParD(level)->TempPress.temp,
@@ -259,7 +251,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
             //     para->getParD(level)->TempPress.k,
             //     para->getParD(level)->pressureBC.q27[0],
             //     para->getParD(level)->TempPress.kTemp,
-            //     para->getParD(level)->TempPress.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -272,8 +263,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
             //////////////////////////////////////////////////////////////////////////
             QADPressDev27(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD27.f[0],
                 para->getParD(level)->TempPress.temp,
@@ -282,7 +271,6 @@ void ADKernelManager::runADpressureBCKernel(int level){
                 para->getParD(level)->TempPress.k,
                 para->getParD(level)->pressureBC.q27[0],
                 para->getParD(level)->TempPress.kTemp,
-                para->getParD(level)->TempPress.kTemp,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -299,8 +287,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
         if (para->getDiffMod() == 7) {
             // QNoSlipADincompDev7(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD7.f[0],
             //     para->getParD(level)->Temp.temp,
@@ -308,7 +294,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
             //     para->getParD(level)->Temp.k,
             //     para->getParD(level)->geometryBC.q27[0],
             //     para->getParD(level)->Temp.kTemp,
-            //     para->getParD(level)->Temp.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -322,8 +307,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
 
             QADDev7(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD7.f[0],
                 para->getParD(level)->Temp.temp,
@@ -331,7 +314,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
                 para->getParD(level)->Temp.k,
                 para->getParD(level)->geometryBC.q27[0],
                 para->getParD(level)->Temp.kTemp,
-                para->getParD(level)->Temp.kTemp,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -342,8 +324,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
         } else if (para->getDiffMod() == 27) {
             // QNoSlipADincompDev27(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD27.f[0],
             //     para->getParD(level)->Temp.temp,
@@ -351,7 +331,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
             //     para->getParD(level)->Temp.k,
             //     para->getParD(level)->geometryBC.q27[0],
             //     para->getParD(level)->Temp.kTemp,
-            //     para->getParD(level)->Temp.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -365,8 +344,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
 
             QADBBDev27(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD27.f[0],
                 para->getParD(level)->Temp.temp,
@@ -374,7 +351,6 @@ void ADKernelManager::runADgeometryBCKernel(int level)
                 para->getParD(level)->Temp.k,
                 para->getParD(level)->geometryBC.q27[0],
                 para->getParD(level)->Temp.kTemp,
-                para->getParD(level)->Temp.kTemp,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -391,8 +367,6 @@ void ADKernelManager::runADveloBCKernel(int level){
         {
             // QADVeloIncompDev7(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD7.f[0],
             //     para->getParD(level)->TempVel.tempPulse,
@@ -401,7 +375,6 @@ void ADKernelManager::runADveloBCKernel(int level){
             //     para->getParD(level)->TempVel.k,
             //     para->getParD(level)->velocityBC.q27[0],
             //     para->getParD(level)->TempVel.kTemp,
-            //     para->getParD(level)->TempVel.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -415,8 +388,6 @@ void ADKernelManager::runADveloBCKernel(int level){
 
             QADVelDev7(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD7.f[0],
                 para->getParD(level)->TempVel.temp,
@@ -425,7 +396,6 @@ void ADKernelManager::runADveloBCKernel(int level){
                 para->getParD(level)->TempVel.k,
                 para->getParD(level)->velocityBC.q27[0],
                 para->getParD(level)->TempVel.kTemp,
-                para->getParD(level)->TempVel.kTemp,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -436,8 +406,6 @@ void ADKernelManager::runADveloBCKernel(int level){
         } else if (para->getDiffMod() == 27) {
             // QADVeloIncompDev27(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD27.f[0],
             //     para->getParD(level)->TempVel.temp,
@@ -446,7 +414,6 @@ void ADKernelManager::runADveloBCKernel(int level){
             //     para->getParD(level)->TempVel.k,
             //     para->getParD(level)->velocityBC.q27[0],
             //     para->getParD(level)->TempVel.kTemp,
-            //     para->getParD(level)->TempVel.kTemp,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -459,8 +426,6 @@ void ADKernelManager::runADveloBCKernel(int level){
             //////////////////////////////////////////////////////////////////////////
             QADVelDev27(
                 para->getParD(level)->numberofthreads,
-                para->getParD(level)->nx,
-                para->getParD(level)->ny,
                 para->getParD(level)->distributions.f[0],
                 para->getParD(level)->distributionsAD27.f[0],
                 para->getParD(level)->TempVel.tempPulse,
@@ -468,8 +433,7 @@ void ADKernelManager::runADveloBCKernel(int level){
                 para->getParD(level)->diffusivity,
                 para->getParD(level)->velocityBC.k,
                 para->getParD(level)->velocityBC.q27[0],
-                para->getParD(level)->numberOfVeloBCnodes,
-                para->getParD(level)->numberOfVeloBCnodes,
+                para->getParD(level)->velocityBC.numberOfBCnodes,
                 para->getParD(level)->omega,
                 para->getParD(level)->neighborX,
                 para->getParD(level)->neighborY,
@@ -484,8 +448,6 @@ void ADKernelManager::runADveloBCKernel(int level){
             // {
             //   QADVelDev27(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD27.f[0],
             //     para->getParD(level)->TempVel.tempPulse,
@@ -493,8 +455,7 @@ void ADKernelManager::runADveloBCKernel(int level){
             //     para->getParD(level)->diffusivity,
             //     para->getParD(level)->velocityBC.k,
             //     para->getParD(level)->velocityBC.q27[0],
-            //     para->getParD(level)->numberOfVeloBCnodes,
-            //     para->getParD(level)->numberOfVeloBCnodes,
+            //     para->getParD(level)->velocityBC.numberOfBCnodes,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
@@ -506,8 +467,6 @@ void ADKernelManager::runADveloBCKernel(int level){
             // {
             //   QADVelDev27(
             //     para->getParD(level)->numberofthreads,
-            //     para->getParD(level)->nx,
-            //     para->getParD(level)->ny,
             //     para->getParD(level)->distributions.f[0],
             //     para->getParD(level)->distributionsAD27.f[0],
             //     para->getParD(level)->TempVel.temp,
@@ -515,8 +474,7 @@ void ADKernelManager::runADveloBCKernel(int level){
             //     para->getParD(level)->diffusivity,
             //     para->getParD(level)->velocityBC.k,
             //     para->getParD(level)->velocityBC.q27[0],
-            //     para->getParD(level)->numberOfVeloBCnodes,
-            //     para->getParD(level)->numberOfVeloBCnodes,
+            //     para->getParD(level)->velocityBC.numberOfBCnodes,
             //     para->getParD(level)->omega,
             //     para->getParD(level)->neighborX,
             //     para->getParD(level)->neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/LBKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/LBKernelManager.cpp
index 3d49a956fd92e884651eb9653587e9b054afde9b..0c7a88e3c351f11754ae9632ec4766b53190179a 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/LBKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/LBKernelManager.cpp
@@ -72,7 +72,7 @@ void LBKernelManager::runLBMKernel(int level)
 
 void LBKernelManager::runVelocityBCKernelPre(int level)
 {
-    if (para->getParD(level)->numberOfVeloBCnodes > 0)
+    if (para->getParD(level)->velocityBC.numberOfBCnodes > 0)
     {
         // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
         // if ( myid == 0)
@@ -80,7 +80,7 @@ void LBKernelManager::runVelocityBCKernelPre(int level)
         //    VelSchlaffer27(para->getParD(level)->numberofthreads, t,
         //                   para->getParD(level)->distributions.f[0],       para->getParD(level)->velocityBC.Vz,
         //                   para->getParD(level)->velocityBC.deltaVz, para->getParD(level)->velocityBC.k,
-        //                   para->getParD(level)->velocityBC.kN,      para->getParD(level)->numberOfVeloBCnodes,
+        //                   para->getParD(level)->velocityBC.kN,      para->getParD(level)->velocityBC.numberOfBCnodes,
         //                   para->getParD(level)->omega,           para->getParD(level)->neighborX,
         //                   para->getParD(level)->neighborY,    para->getParD(level)->neighborZ,
         //                   para->getParD(level)->numberOfNodes,     para->getParD(level)->isEvenTimestep);
@@ -90,16 +90,13 @@ void LBKernelManager::runVelocityBCKernelPre(int level)
         // high viscosity incompressible
         // QVelDevIncompHighNu27(
         //     para->getParD(level)->numberofthreads,
-        //     para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
         //     para->getParD(level)->velocityBC.Vx,
         //     para->getParD(level)->velocityBC.Vy,
         //     para->getParD(level)->velocityBC.Vz,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->velocityBC.k,
         //     para->getParD(level)->velocityBC.q27[0],
-        //     para->getParD(level)->numberOfVeloBCnodes,
-        //     para->getParD(level)->numberOfVeloBCnodes,
+        //     para->getParD(level)->velocityBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
@@ -111,16 +108,13 @@ void LBKernelManager::runVelocityBCKernelPre(int level)
         // high viscosity compressible
         // QVelDevCompHighNu27(
         //     para->getParD(level)->numberofthreads,
-        //     para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
         //     para->getParD(level)->velocityBC.Vx,
         //     para->getParD(level)->velocityBC.Vy,
         //     para->getParD(level)->velocityBC.Vz,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->velocityBC.k,
         //     para->getParD(level)->velocityBC.q27[0],
-        //     para->getParD(level)->numberOfVeloBCnodes,
-        //     para->getParD(level)->numberOfVeloBCnodes,
+        //     para->getParD(level)->velocityBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
@@ -132,23 +126,23 @@ void LBKernelManager::runVelocityBCKernelPre(int level)
 
 void LBKernelManager::runVelocityBCKernelPost(int level)
 {
-     if (para->getParD(level)->numberOfVeloBCnodes > 0)
+     if (para->getParD(level)->velocityBC.numberOfBCnodes > 0)
      {
         //   QVelDevicePlainBB27(
-        //     para->getParD()->numberofthreads,
-        //     para->getParD()->velocityBC.Vx,
-        //     para->getParD()->velocityBC.Vy,
-        //     para->getParD()->velocityBC.Vz,
-        //     para->getParD()->distributions.f[0],
-        //     para->getParD()->velocityBC.k,
-        //     para->getParD()->velocityBC.q27[0],
-        //     para->getParD()->numberOfVeloBCnodes,
-        //     para->getParD()->velocityBC.kArray,
-        //     para->getParD()->neighborX,
-        //     para->getParD()->neighborY,
-        //     para->getParD()->neighborZ,
-        //     para->getParD()->numberOfNodes,
-        //     para->getParD()->isEvenTimestep);
+        //     para->getParD(level)->numberofthreads,
+        //     para->getParD(level)->velocityBC.Vx,
+        //     para->getParD(level)->velocityBC.Vy,
+        //     para->getParD(level)->velocityBC.Vz,
+        //     para->getParD(level)->distributions.f[0],
+        //     para->getParD(level)->velocityBC.k,
+        //     para->getParD(level)->velocityBC.q27[0],
+        //     para->getParD(level)->velocityBC.numberOfBCnodes,
+        //     para->getParD(level)->velocityBC.kArray,
+        //     para->getParD(level)->neighborX,
+        //     para->getParD(level)->neighborY,
+        //     para->getParD(level)->neighborZ,
+        //     para->getParD(level)->numberOfNodes,
+        //     para->getParD(level)->isEvenTimestep);
 
         // QVelDev27(
         //     para->getParD(level)->numberofthreads,
@@ -160,45 +154,39 @@ void LBKernelManager::runVelocityBCKernelPost(int level)
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->velocityBC.k,
         //     para->getParD(level)->velocityBC.q27[0],
-        //     para->getParD(level)->numberOfVeloBCnodes,
-        //     para->getParD(level)->numberOfVeloBCnodes,
+        //     para->getParD(level)->velocityBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         // QVelDevComp27(
-        //     para->getParD(level)->numberofthreads, para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
+        //     para->getParD(level)->numberofthreads,
         //     para->getParD(level)->velocityBC.Vx,
         //     para->getParD(level)->velocityBC.Vy,
         //     para->getParD(level)->velocityBC.Vz,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->velocityBC.k,
         //     para->getParD(level)->velocityBC.q27[0],
-        //     para->getParD(level)->numberOfVeloBCnodes,
-        //     para->getParD(level)->numberOfVeloBCnodes,
+        //     para->getParD(level)->velocityBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         QVelDevCompZeroPress27(
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->nx,
-            para->getParD(level)->ny,
             para->getParD(level)->velocityBC.Vx,
             para->getParD(level)->velocityBC.Vy,
             para->getParD(level)->velocityBC.Vz,
             para->getParD(level)->distributions.f[0],
             para->getParD(level)->velocityBC.k,
             para->getParD(level)->velocityBC.q27[0],
-            para->getParD(level)->numberOfVeloBCnodes,
-            para->getParD(level)->velocityBC.kArray,
+            para->getParD(level)->velocityBC.numberOfBCnodes,
             para->getParD(level)->omega,
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
@@ -210,15 +198,15 @@ void LBKernelManager::runVelocityBCKernelPost(int level)
         // D E P R E C A T E D
         //////////////////////////////////////////////////////////////////////////
 
-        //QVelDevice1h27( para->getParD(level)->numberofthreads, para->getParD(level)->nx,           para->getParD(level)->ny,
+        // QVelDevice1h27( para->getParD(level)->numberofthreads, para->getParD(level)->nx,           para->getParD(level)->ny,
         //                para->getParD(level)->velocityBC.Vx,      para->getParD(level)->velocityBC.Vy,   para->getParD(level)->velocityBC.Vz,
         //                para->getParD(level)->distributions.f[0],       para->getParD(level)->velocityBC.k,    para->getParD(level)->velocityBC.q27[0],
-        //                para->getParD(level)->numberOfVeloBCnodes,        para->getParD(level)->numberOfVeloBCnodes,     para->getParD(level)->omega,
+        //                para->getParD(level)->velocityBC.numberOfBCnodes,      para->getParD(level)->omega,
         //                para->getPhi(),                        para->getAngularVelocity(),
         //                para->getParD(level)->neighborX,    para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
         //                para->getParD(level)->coordinateX,       para->getParD(level)->coordinateY,    para->getParD(level)->coordinateZ,
-        //                para->getParD(level)->size_Mat_SP,     para->getParD(level)->isEvenTimestep);
-        //getLastCudaError("QVelDev27 execution failed");
+        //                para->getParD(level)->size_Mat,     para->getParD(level)->isEvenTimestep);
+        // getLastCudaError("QVelDev27 execution failed");
      }
 }
 
@@ -301,13 +289,10 @@ void LBKernelManager::runGeoBCKernelPre(int level, unsigned int t, CudaMemoryMan
         // high viscosity incompressible
         // QDevIncompHighNu27(
         //     para->getParD(level)->numberofthreads,
-        //     para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
@@ -319,13 +304,10 @@ void LBKernelManager::runGeoBCKernelPre(int level, unsigned int t, CudaMemoryMan
         // high viscosity compressible
         // QDevCompHighNu27(
         //     para->getParD(level)->numberofthreads,
-        //     para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
@@ -367,12 +349,11 @@ void LBKernelManager::runGeoBCKernelPost(int level)
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         // QDev27(
@@ -383,12 +364,11 @@ void LBKernelManager::runGeoBCKernelPost(int level)
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         // QVelDev27(
@@ -402,12 +382,11 @@ void LBKernelManager::runGeoBCKernelPost(int level)
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         // QDevComp27(
@@ -418,12 +397,11 @@ void LBKernelManager::runGeoBCKernelPost(int level)
         //     para->getParD(level)->geometryBC.k,
         //     para->getParD(level)->geometryBC.q27[0],
         //     para->getParD(level)->geometryBC.numberOfBCnodes,
-        //     para->getParD(level)->geometryBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         QVelDevComp27(
@@ -435,7 +413,6 @@ void LBKernelManager::runGeoBCKernelPost(int level)
             para->getParD(level)->geometryBC.k,
             para->getParD(level)->geometryBC.q27[0],
             para->getParD(level)->geometryBC.numberOfBCnodes,
-            para->getParD(level)->geometryBC.numberOfBCnodes,
             para->getParD(level)->omega,
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
@@ -443,52 +420,49 @@ void LBKernelManager::runGeoBCKernelPost(int level)
             para->getParD(level)->numberOfNodes,
             para->getParD(level)->isEvenTimestep);
 
-    //     QVelDevCompZeroPress27(
-    //         para->getParD(0)->numberofthreads, para->getParD(0)->nx,
-    //         para->getParD(0)->ny,
-    //         para->getParD(0)->geometryBC.Vx,
-    //         para->getParD(0)->geometryBC.Vy,
-    //         para->getParD(0)->geometryBC.Vz,
-    //         para->getParD(0)->distributions.f[0],
-    //         para->getParD(0)->geometryBC.k,
-    //         para->getParD(0)->geometryBC.q27[0],
-    //         para->getParD(0)->geometryBC.numberOfBCnodes,
-    //         para->getParD(0)->geometryBC.numberOfBCnodes,
-    //         para->getParD(0)->omega,
-    //         para->getParD(0)->neighborX,
-    //         para->getParD(0)->neighborY,
-    //         para->getParD(0)->neighborZ,
-    //         para->getParD(0)->size_Mat_SP,
-    //         para->getParD(0)->isEvenTimestep);
-
-    //     QDev3rdMomentsComp27(
-    //         para->getParD(level)->numberofthreads,
-    //         para->getParD(level)->nx,
-    //         para->getParD(level)->ny,
-    //         para->getParD(level)->distributions.f[0],
-    //         para->getParD(level)->geometryBC.k,
-    //         para->getParD(level)->geometryBC.q27[0],
-    //         para->getParD(level)->geometryBC.numberOfBCnodes,
-    //         para->getParD(level)->geometryBC.numberOfBCnodes,
-    //         para->getParD(level)->omega,
-    //         para->getParD(level)->neighborX,
-    //         para->getParD(level)->neighborY,
-    //         para->getParD(level)->neighborZ,
-    //         para->getParD(level)->size_Mat_SP,
-    //         para->getParD(level)->isEvenTimestep);
+        // QVelDevCompZeroPress27(
+        //     para->getParD(0)->numberofthreads,
+        //     para->getParD(0)->geometryBC.Vx,
+        //     para->getParD(0)->geometryBC.Vy,
+        //     para->getParD(0)->geometryBC.Vz,
+        //     para->getParD(0)->distributions.f[0],
+        //     para->getParD(0)->geometryBC.k,
+        //     para->getParD(0)->geometryBC.q27[0],
+        //     para->getParD(0)->geometryBC.numberOfBCnodes,
+        //     para->getParD(0)->omega,
+        //     para->getParD(0)->neighborX,
+        //     para->getParD(0)->neighborY,
+        //     para->getParD(0)->neighborZ,
+        //     para->getParD(0)->size_Mat,
+        //     para->getParD(0)->isEvenTimestep);
 
-    //     QSlipDev27(
-    //         para->getParD(level)->numberofthreads,
-    //         para->getParD(level)->distributions.f[0],
-    //         para->getParD(level)->geometryBC.k,
-    //         para->getParD(level)->geometryBC.q27[0],
-    //         para->getParD(level)->geometryBC.numberOfBCnodes,
-    //         para->getParD(level)->omega,
-    //         para->getParD(level)->neighborX,
-    //         para->getParD(level)->neighborY,
-    //         para->getParD(level)->neighborZ,
-    //         para->getParD(level)->size_Mat_SP,
-    //         para->getParD(level)->isEvenTimestep);
+        // QDev3rdMomentsComp27(
+        //     para->getParD(level)->numberofthreads,
+        //     para->getParD(level)->nx,
+        //     para->getParD(level)->ny,
+        //     para->getParD(level)->distributions.f[0],
+        //     para->getParD(level)->geometryBC.k,
+        //     para->getParD(level)->geometryBC.q27[0],
+        //     para->getParD(level)->geometryBC.numberOfBCnodes,
+        //     para->getParD(level)->omega,
+        //     para->getParD(level)->neighborX,
+        //     para->getParD(level)->neighborY,
+        //     para->getParD(level)->neighborZ,
+        //     para->getParD(level)->size_Mat,
+        //     para->getParD(level)->isEvenTimestep);
+
+        // QSlipDev27(
+        //     para->getParD(level)->numberofthreads,
+        //     para->getParD(level)->distributions.f[0],
+        //     para->getParD(level)->geometryBC.k,
+        //     para->getParD(level)->geometryBC.q27[0],
+        //     para->getParD(level)->geometryBC.numberOfBCnodes,
+        //     para->getParD(level)->omega,
+        //     para->getParD(level)->neighborX,
+        //     para->getParD(level)->neighborY,
+        //     para->getParD(level)->neighborZ,
+        //     para->getParD(level)->size_Mat,
+        //     para->getParD(level)->isEvenTimestep);
 
     //////////////////////////////////////////////////////////////////////////
     // D E P R E C A T E D
@@ -530,7 +504,7 @@ void LBKernelManager::runGeoBCKernelPost(int level)
 }
 
 void LBKernelManager::runOutflowBCKernelPre(int level){
-    if (para->getParD(level)->numberOfOutflowBCnodes > 0)
+    if (para->getParD(level)->outflowBC.numberOfBCnodes > 0)
     {
         QPressNoRhoDev27(
             para->getParD(level)->numberofthreads,
@@ -558,7 +532,7 @@ void LBKernelManager::runOutflowBCKernelPre(int level){
         //     para->getParD(level)->outflowBC.deltaVz,
         //     para->getParD(level)->outflowBC.k,
         //     para->getParD(level)->outflowBC.kN,
-        //     para->getParD(level)->numberOfOutflowBCnodes,
+        //     para->getParD(level)->outflowBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
@@ -614,8 +588,8 @@ void LBKernelManager::runPressureBCKernelPre(int level){
         //     para->getParD(level)->numberOfNodes,
         //     para->getParD(level)->isEvenTimestep);
 
-        ////////////////////////////////////////////////////////////////////////////////
-        //press NEQ incompressible
+        // //////////////////////////////////////////////////////////////////////////////
+        // // press NEQ incompressible
         // QPressDevIncompNEQ27(
         //     para->getParD(level)->numberofthreads,
         //     para->getParD(level)->pressureBC.RhoBC,
@@ -630,8 +604,8 @@ void LBKernelManager::runPressureBCKernelPre(int level){
         //     para->getParD(level)->numberOfNodes,
         //     para->getParD(level)->isEvenTimestep);
 
-        //////////////////////////////////////////////////////////////////////////////////
-        //press NEQ compressible
+        // ////////////////////////////////////////////////////////////////////////////////
+        // // press NEQ compressible
         // QPressDevNEQ27(
         //     para->getParD(level)->numberofthreads,
         //     para->getParD(level)->pressureBC.RhoBC,
@@ -649,72 +623,71 @@ void LBKernelManager::runPressureBCKernelPre(int level){
 }
 
 void LBKernelManager::runPressureBCKernelPost(int level){
-    if (para->getParD(level)->numberOfPressureBCnodes > 0)
+    if (para->getParD(level)->pressureBC.numberOfBCnodes > 0)
     {
-        QPressDev27_IntBB(
-            para->getParD(level)->numberofthreads, 
-            para->getParD(level)->pressureBC.RhoBC,
-            para->getParD(level)->distributions.f[0],
-            para->getParD(level)->pressureBC.k,
-            para->getParD(level)->pressureBC.q27[0],
-            para->getParD(level)->pressureBC.numberOfBCnodes,
-            para->getParD(level)->pressureBC.numberOfBCnodes,
-            para->getParD(level)->omega,
-            para->getParD(level)->neighborX,
-            para->getParD(level)->neighborY,
-            para->getParD(level)->neighborZ,
-            para->getParD(level)->numberOfNodes,
-            para->getParD(level)->isEvenTimestep);
+        // QPressDev27_IntBB(
+        //     para->getParD(level)->numberofthreads, 
+        //     para->getParD(level)->pressureBC.RhoBC,
+        //     para->getParD(level)->distributions.f[0],
+        //     para->getParD(level)->pressureBC.k,
+        //     para->getParD(level)->pressureBC.q27[0],
+        //     para->getParD(level)->pressureBC.numberOfBCnodes,
+        //     para->getParD(level)->omega,
+        //     para->getParD(level)->neighborX,
+        //     para->getParD(level)->neighborY,
+        //     para->getParD(level)->neighborZ,
+        //     para->getParD(level)->numberOfNodes,
+        //     para->getParD(level)->isEvenTimestep);
     }
 }
 
 void LBKernelManager::runStressWallModelKernel(int level){
-    if (para->getParD(level)->numberOfStressBCnodes > 0)
+    if (para->getParD(level)->stressBC.numberOfBCnodes > 0)
     {
         // QStressDevComp27(para->getParD(level)->numberofthreads, para->getParD(level)->distributions.f[0],
         //                 para->getParD(level)->stressBC.k,       para->getParD(level)->stressBC.kN,
-        //                 para->getParD(level)->stressBC.q27[0],  para->getParD(level)->numberOfStressBCnodes,
-        //                 para->getParD(level)->omega,           para->getParD(level)->turbViscosity,
-        //                 para->getParD(level)->velocityX,           para->getParD(level)->velocityY,             para->getParD(level)->velocityY,
+        //                 para->getParD(level)->stressBC.q27[0],  para->getParD(level)->stressBC.numberOfBCnodes,
+        //                 para->getParD(level)->omega,            para->getParD(level)->turbViscosity,
+        //                 para->getParD(level)->velocityX,        para->getParD(level)->velocityY,          para->getParD(level)->velocityY,
         //                 para->getParD(level)->stressBC.normalX, para->getParD(level)->stressBC.normalY,   para->getParD(level)->stressBC.normalZ,
         //                 para->getParD(level)->stressBC.Vx,      para->getParD(level)->stressBC.Vy,        para->getParD(level)->stressBC.Vz,
         //                 para->getParD(level)->stressBC.Vx1,     para->getParD(level)->stressBC.Vy1,       para->getParD(level)->stressBC.Vz1,
         //                 para->getParD(level)->wallModel.samplingOffset, para->getParD(level)->wallModel.z0,
         //                 para->getHasWallModelMonitor(),        para->getParD(level)->wallModel.u_star,
-        //                 para->getParD(level)->wallModel.Fx,    para->getParD(level)->wallModel.Fy,      para->getParD(level)->wallModel.Fz,
-        //                 para->getParD(level)->neighborX,    para->getParD(level)->neighborY,      para->getParD(level)->neighborZ,
-        //                 para->getParD(level)->size_Mat_SP,     para->getParD(level)->isEvenTimestep);
+        //                 para->getParD(level)->wallModel.Fx,    para->getParD(level)->wallModel.Fy,        para->getParD(level)->wallModel.Fz,
+        //                 para->getParD(level)->neighborX,       para->getParD(level)->neighborY,           para->getParD(level)->neighborZ,
+        //                 para->getParD(level)->size_Mat,        para->getParD(level)->isEvenTimestep);
 
-        BBStressDev27( para->getParD(level)->numberofthreads, para->getParD(level)->distributions.f[0],
+        BBStressDev27( para->getParD(level)->numberofthreads,   para->getParD(level)->distributions.f[0],
                         para->getParD(level)->stressBC.k,       para->getParD(level)->stressBC.kN,
-                        para->getParD(level)->stressBC.q27[0],  para->getParD(level)->numberOfStressBCnodes,
-                        para->getParD(level)->velocityX,           para->getParD(level)->velocityY,             para->getParD(level)->velocityY,
+                        para->getParD(level)->stressBC.q27[0],  para->getParD(level)->stressBC.numberOfBCnodes,
+                        para->getParD(level)->velocityX,        para->getParD(level)->velocityY,          para->getParD(level)->velocityY,
                         para->getParD(level)->stressBC.normalX, para->getParD(level)->stressBC.normalY,   para->getParD(level)->stressBC.normalZ,
                         para->getParD(level)->stressBC.Vx,      para->getParD(level)->stressBC.Vy,        para->getParD(level)->stressBC.Vz,
                         para->getParD(level)->stressBC.Vx1,     para->getParD(level)->stressBC.Vy1,       para->getParD(level)->stressBC.Vz1,
                         para->getParD(level)->wallModel.samplingOffset, para->getParD(level)->wallModel.z0,
-                        para->getHasWallModelMonitor(),        para->getParD(level)->wallModel.u_star,
-                        para->getParD(level)->wallModel.Fx,    para->getParD(level)->wallModel.Fy,      para->getParD(level)->wallModel.Fz,
-                        para->getParD(level)->neighborX,    para->getParD(level)->neighborY,      para->getParD(level)->neighborZ,
-                        para->getParD(level)->numberOfNodes,     para->getParD(level)->isEvenTimestep);
+                        para->getHasWallModelMonitor(),         para->getParD(level)->wallModel.u_star,
+                        para->getParD(level)->wallModel.Fx,     para->getParD(level)->wallModel.Fy,      para->getParD(level)->wallModel.Fz,
+                        para->getParD(level)->neighborX,        para->getParD(level)->neighborY,         para->getParD(level)->neighborZ,
+                        para->getParD(level)->numberOfNodes,    para->getParD(level)->isEvenTimestep);
     }
 }
 
 
 void LBKernelManager::runSlipBCKernel(int level){
-    if (para->getParD(level)->numberOfSlipBCnodes > 0)
+    if (para->getParD(level)->slipBC.numberOfBCnodes > 0)
     {
         // QSlipDev27(
         //     para->getParD(level)->numberofthreads,
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->slipBC.k,
         //     para->getParD(level)->slipBC.q27[0],
-        //     para->getParD(level)->numberOfSlipBCnodes,
+        //     para->getParD(level)->slipBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         QSlipDevComp27(
@@ -722,7 +695,7 @@ void LBKernelManager::runSlipBCKernel(int level){
             para->getParD(level)->distributions.f[0],
             para->getParD(level)->slipBC.k,
             para->getParD(level)->slipBC.q27[0],
-            para->getParD(level)->numberOfSlipBCnodes,
+            para->getParD(level)->slipBC.numberOfBCnodes,
             para->getParD(level)->omega,
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
@@ -735,7 +708,7 @@ void LBKernelManager::runSlipBCKernel(int level){
 }
 
 void LBKernelManager::runNoSlipBCKernel(int level){
-    if (para->getParD(level)->numberOfNoSlipBCnodes > 0)
+    if (para->getParD(level)->noSlipBC.numberOfBCnodes > 0)
     {
         // QDev27(
         //     para->getParD(level)->numberofthreads,
@@ -744,13 +717,12 @@ void LBKernelManager::runNoSlipBCKernel(int level){
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->noSlipBC.k,
         //     para->getParD(level)->noSlipBC.q27[0],
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
+        //     para->getParD(level)->noSlipBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         // BBDev27(
@@ -760,40 +732,20 @@ void LBKernelManager::runNoSlipBCKernel(int level){
         //     para->getParD(level)->distributions.f[0],
         //     para->getParD(level)->noSlipBC.k,
         //     para->getParD(level)->noSlipBC.q27[0],
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
+        //     para->getParD(level)->noSlipBC.numberOfBCnodes,
         //     para->getParD(level)->omega,
         //     para->getParD(level)->neighborX,
         //     para->getParD(level)->neighborY,
         //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
-        //     para->getParD(level)->isEvenTimestep);
-
-        // QDev27(
-        //     para->getParD(level)->numberofthreads,
-        //     para->getParD(level)->nx,
-        //     para->getParD(level)->ny,
-        //     para->getParD(level)->distributions.f[0],
-        //     para->getParD(level)->noSlipBC.k,
-        //     para->getParD(level)->noSlipBC.q27[0],
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
-        //     para->getParD(level)->numberOfNoSlipBCnodes,
-        //     para->getParD(level)->omega,
-        //     para->getParD(level)->neighborX,
-        //     para->getParD(level)->neighborY,
-        //     para->getParD(level)->neighborZ,
-        //     para->getParD(level)->size_Mat_SP,
+        //     para->getParD(level)->size_Mat,
         //     para->getParD(level)->isEvenTimestep);
 
         QDevComp27(
             para->getParD(level)->numberofthreads,
-            para->getParD(level)->nx,
-            para->getParD(level)->ny,
             para->getParD(level)->distributions.f[0],
             para->getParD(level)->noSlipBC.k,
             para->getParD(level)->noSlipBC.q27[0],
-            para->getParD(level)->numberOfNoSlipBCnodes,
-            para->getParD(level)->numberOfNoSlipBCnodes,
+            para->getParD(level)->noSlipBC.numberOfBCnodes,
             para->getParD(level)->omega,
             para->getParD(level)->neighborX,
             para->getParD(level)->neighborY,
@@ -803,13 +755,6 @@ void LBKernelManager::runNoSlipBCKernel(int level){
     }
 }
 
-// void LBKernelManager::runPressureBCKernelPre(int level){
-//     if (para->getParD()->numberOfPressureBCnodes > 0)
-//     {
-//         // ...
-//     }
-// }
-
 // void LBKernelManager::calculateMacroscopicValues(int level)
 // {
 //     if (para->getIsADcalculationOn()) {
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index ea37fcc0725dd5c6a0d2d10cdb23a9ab767066a7..0cfab0031fef1aaaed421104e107bc0baac58f7d 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -191,7 +191,7 @@ typedef struct QforBC{
    real* qread;
    real* q27[27];
    real* q19[19];
-   int numberOfBCnodes=0;
+   unsigned int numberOfBCnodes=0;
    int kArray;
    real *Vx,      *Vy,      *Vz;
    real *Vx1,     *Vy1,     *Vz1;
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 1ab8dcba564a0d742654cf17d893144410e3a125..29725c5a30737161aeefd8d3642febaf0385bf53 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -782,46 +782,46 @@ void Simulation::run()
 								   para->getParD(lev)->isEvenTimestep);
                    getLastCudaError("CalcMacSP27 execution failed");
 
-				   //�berschreiben mit Wandknoten
-				   //SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-							//				   para->getParD(lev)->vx_SP,
-							//				   para->getParD(lev)->vy_SP,
-							//				   para->getParD(lev)->vz_SP,
-							//				   para->getParD(lev)->geometryBC.Vx,
-							//				   para->getParD(lev)->geometryBC.Vy,
-							//				   para->getParD(lev)->geometryBC.Vz,
-							//				   para->getParD(lev)->geometryBC.kQ,
-							//				   para->getParD(lev)->geometryBC.k,
-							//				   para->getParD(lev)->rho,
-							//				   para->getParD(lev)->pressure,
-							//				   para->getParD(lev)->geoSP,
-							//				   para->getParD(lev)->neighborX_SP,
-							//				   para->getParD(lev)->neighborY_SP,
-							//				   para->getParD(lev)->neighborZ_SP,
-							//				   para->getParD(lev)->size_Mat_SP,
-							//				   para->getParD(lev)->d0SP.f[0],
-							//				   para->getParD(lev)->evenOrOdd);
-       //            getLastCudaError("SetOutputWallVelocitySP27 execution failed");
-
-   				   //SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-										//	   para->getParD(lev)->vx_SP,
-										//	   para->getParD(lev)->vy_SP,
-										//	   para->getParD(lev)->vz_SP,
-										//	   para->getParD(lev)->velocityBC.Vx,
-										//	   para->getParD(lev)->velocityBC.Vy,
-										//	   para->getParD(lev)->velocityBC.Vz,
-										//	   para->getParD(lev)->numberOfVeloBCnodes,
-										//	   para->getParD(lev)->velocityBC.k,
-										//	   para->getParD(lev)->rho,
-										//	   para->getParD(lev)->pressure,
-										//	   para->getParD(lev)->geoSP,
-										//	   para->getParD(lev)->neighborX_SP,
-										//	   para->getParD(lev)->neighborY_SP,
-										//	   para->getParD(lev)->neighborZ_SP,
-										//	   para->getParD(lev)->size_Mat_SP,
-										//	   para->getParD(lev)->d0SP.f[0],
-										//	   para->getParD(lev)->evenOrOdd);
-          //         getLastCudaError("SetOutputWallVelocitySP27 execution failed");
+				// // overwrite with wall nodes
+				//    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
+				// 							   para->getParD(lev)->velocityX,
+				// 							   para->getParD(lev)->velocityY,
+				// 							   para->getParD(lev)->velocityZ,
+				// 							   para->getParD(lev)->geometryBC.Vx,
+				// 							   para->getParD(lev)->geometryBC.Vy,
+				// 							   para->getParD(lev)->geometryBC.Vz,
+				// 							   para->getParD(lev)->geometryBC.numberOfBCnodes,
+				// 							   para->getParD(lev)->geometryBC.k,
+				// 							   para->getParD(lev)->rho,
+				// 							   para->getParD(lev)->pressure,
+				// 							   para->getParD(lev)->typeOfGridNode,
+				// 							   para->getParD(lev)->neighborX,
+				// 							   para->getParD(lev)->neighborY,
+				// 							   para->getParD(lev)->neighborZ,
+				// 							   para->getParD(lev)->size_Mat,
+				// 							   para->getParD(lev)->distributions.f[0],
+				// 							   para->getParD(lev)->isEvenTimestep);
+                //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
+
+   				//    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
+				// 							   para->getParD(lev)->velocityX,
+				// 							   para->getParD(lev)->velocityY,
+				// 							   para->getParD(lev)->velocityZ,
+				// 							   para->getParD(lev)->velocityBC.Vx,
+				// 							   para->getParD(lev)->velocityBC.Vy,
+				// 							   para->getParD(lev)->velocityBC.Vz,
+				// 							   para->getParD(lev)->velocityBC.numberOfBCnodes,
+				// 							   para->getParD(lev)->velocityBC.k,
+				// 							   para->getParD(lev)->rho,
+				// 							   para->getParD(lev)->pressure,
+				// 							   para->getParD(lev)->typeOfGridNode,
+				// 							   para->getParD(lev)->neighborX,
+				// 							   para->getParD(lev)->neighborY,
+				// 							   para->getParD(lev)->neighborZ,
+				// 							   para->getParD(lev)->size_Mat,
+				// 							   para->getParD(lev)->distributions.f[0],
+				// 							   para->getParD(lev)->isEvenTimestep);
+                //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
 
 				 //}
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
index 8b11a94bb264a64e37267be0a15b205044719d31..bbdc0416c9a3174a966f7710eac9d460e2da9e29 100644
--- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
@@ -210,7 +210,7 @@ void writeBcPointsDebug(Parameter *para)
     nodesVec2.resize(nodeNumberVec * 8);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (int u = 0; u < para->getParH(level)->noSlipBC.numberOfBCnodes; u++) {
+        for (uint u = 0; u < para->getParH(level)->noSlipBC.numberOfBCnodes; u++) {
             int pos = para->getParH(level)->noSlipBC.k[u];
 
             double x1 = para->getParH(level)->coordinateX[pos];
@@ -238,7 +238,7 @@ void writePressPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (int u = 0; u < para->getParH(level)->pressureBC.numberOfBCnodes; u++) {
+        for (uint u = 0; u < para->getParH(level)->pressureBC.numberOfBCnodes; u++) {
             int pos = para->getParH(level)->pressureBC.k[u];
 
             double x1 = para->getParH(level)->coordinateX[pos];
@@ -265,7 +265,7 @@ void writePressNeighborPointsDebug(Parameter *para)
     nodesVec.resize(nodeNumberVec);
     int nodeCount2 = 0;
     for (int level = 0; level <= para->getMaxLevel(); level++) {
-        for (int u = 0; u < para->getParH(level)->pressureBC.numberOfBCnodes; u++) {
+        for (uint u = 0; u < para->getParH(level)->pressureBC.numberOfBCnodes; u++) {
             int pos = para->getParH(level)->pressureBC.kN[u];
 
             real x1 = para->getParH(level)->coordinateX[pos];
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 95f2e535e7e157e19e6573db9f2a6788618a01a3..d9c9529cfacaaee3d19547af72cc5b1e760db61e 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -996,14 +996,6 @@ void Parameter::setTempPressD(TempPressforBoundaryConditions *TempPressD)
 {
     this->TempPressD = TempPressD;
 }
-// void Parameter::setNumberOfVeloBCnodes(unsigned int numberOfVeloBCnodes)
-//{
-//   this->numberOfVeloBCnodes = numberOfVeloBCnodes;
-//}
-// void Parameter::setkOutflowQ(unsigned int numberOfOutflowBCnodes)
-//{
-//   this->numberOfOutflowBCnodes = numberOfOutflowBCnodes;
-//}
 // void Parameter::setQinflowH(QforBoundaryConditions* QinflowH)
 //{
 //   this->QinflowH = QinflowH;
@@ -1962,14 +1954,6 @@ TempPressforBoundaryConditions *Parameter::getTempPressD()
 {
     return this->TempPressD;
 }
-// unsigned int Parameter::getNumberOfVeloBCnodes()
-//{
-//   return this->numberOfVeloBCnodes;
-//}
-// unsigned int Parameter::getkOutflowQ()
-//{
-//   return this->numberOfOutflowBCnodes;
-//}
 // QforBoundaryConditions* Parameter::getQinflowH()
 //{
 //   return this->QinflowH;
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 6ce3b10935a4371b934f63336f18755ab12b7dd1..21b3643ba062ccb0ca168e25c285afcec55dfafe 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -221,12 +221,9 @@ struct LBMSimulationParameter {
 
     // BC's////////////////////
     //! \brief stores the boundary condition data
-    QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC;
+    QforBoundaryConditions noSlipBC, velocityBC, outflowBC, slipBC, stressBC, pressureBC;
     //! \brief number of lattice nodes for the boundary conditions
-    unsigned int numberOfNoSlipBCnodes = 0, numberOfVeloBCnodes = 0, numberOfOutflowBCnodes = 0, numberOfSlipBCnodes = 0, numberOfStressBCnodes = 0;
-    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead;
-    QforBoundaryConditions pressureBC;
-    unsigned int numberOfPressureBCnodes = 0, numberOfPressureBCnodesRead;
+    unsigned int numberOfNoSlipBCnodesRead, numberOfVeloBCnodesRead, numberOfOutflowBCnodesRead, numberOfSlipBCnodesRead, numberOfStressBCnodesRead, numberOfPressureBCnodesRead;
 
     QforBoundaryConditions QpressX0, QpressX1, QpressY0, QpressY1, QpressZ0, QpressZ1; // DEPRECATED
     QforBoundaryConditions propellerBC;
@@ -581,8 +578,6 @@ public:
     void setRecvProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex);
     void setRecvProcessNeighborsAfterFtoCY(int numberOfNodes, int level, int arrayIndex);
     void setRecvProcessNeighborsAfterFtoCZ(int numberOfNodes, int level, int arrayIndex);
-    // void setNumberOfVeloBCnodes(unsigned int numberOfVeloBCnodes);
-    // void setkOutflowQ(unsigned int numberOfOutflowBCnodes);
     // void setQinflowH(QforBoundaryConditions* QinflowH);
     // void setQinflowD(QforBoundaryConditions* QinflowD);
     // void setQoutflowH(QforBoundaryConditions* QoutflowH);
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
index 1b52dd8f89e176eadd3ce4103f6f2efacd8355a6..e9b3801e618771b2811d1fa46345349be2e651a9 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
@@ -74,7 +74,7 @@ void initParticles(Parameter* para)
 		}
 		//////////////////////////////////////////////////////////////////////////
 		//set bool "hot wall"
-		for (int h = 0; h < para->getParH(lev)->geometryBC.numberOfBCnodes; h++)
+		for (uint h = 0; h < para->getParH(lev)->geometryBC.numberOfBCnodes; h++)
 		{
 			if (para->getParH(lev)->coordinateX[para->getParH(lev)->geometryBC.k[h]] < para->getStartXHotWall() || 
 				para->getParH(lev)->coordinateX[para->getParH(lev)->geometryBC.k[h]] > para->getEndXHotWall())
@@ -468,7 +468,7 @@ void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 		printf("total number of nodes: %d \n", counter2);
 		//////////////////////////////////////////////////////////////////////////
 		//store the index information of the BC nodes in the geometry array 
-		for (int index = 0; index < para->getParH(lev)->geometryBC.numberOfBCnodes; index++)
+		for (uint index = 0; index < para->getParH(lev)->geometryBC.numberOfBCnodes; index++)
 		{
 			para->getParH(lev)->typeOfGridNode[para->getParH(lev)->geometryBC.k[index]] = index + OFFSET_BCsInGeo;
 		}
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index 55c2aef655ea3bad156c16283159434faf9c53c4..4bcfce7363a7ddf1496d68c81b13c761e97b4e5f 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -156,7 +156,7 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
                             std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
                             int level)
 {
-    assert( para->getParD(level)->numberOfStressBCnodes > 0 && para->getHasWallModelMonitor() );
+    assert( para->getParD(level)->stressBC.numberOfBCnodes > 0 && para->getHasWallModelMonitor() );
 
     real dt = para->getTimeRatio();
     uint nt = uint((para->getTEnd()-this->tStartAvg)/this->tAvg);
@@ -213,7 +213,7 @@ void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Paramete
     thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_begin(dpdz_thrust, indices_thrust);
     thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_end  (dpdz_thrust, indices_thrust+probeStruct->nIndices);
 
-    real N = para->getParD(level)->numberOfStressBCnodes;
+    real N = para->getParD(level)->stressBC.numberOfBCnodes;
     real n = (real)probeStruct->vals;
     int nPoints = probeStruct->nPoints;