From 4176ad9fb93cf20773427f8665e64a769b91422d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Sch=C3=B6nherr?= <schoen@irmb.tu-bs.de>
Date: Sat, 18 Nov 2017 14:49:11 +0100
Subject: [PATCH] AA2016 Kernel with wale + half forcing + new calculation an
 writing for the basel measurepoints

---
 .../Calculation/ForceCalculations.cpp         |    4 +-
 src/VirtualFluids_GPU/GPU/BGK27.cu            |   52 +-
 src/VirtualFluids_GPU/GPU/CalcMac27.cu        |   28 +-
 src/VirtualFluids_GPU/GPU/Cumulant27.cu       |   12 +-
 src/VirtualFluids_GPU/GPU/GPU_Interface.h     |   18 +
 src/VirtualFluids_GPU/GPU/GPU_Kernels.cuh     |   16 +
 src/VirtualFluids_GPU/GPU/LBMKernel.cu        |   55 +
 src/VirtualFluids_GPU/GPU/WaleCumulant27.cu   | 1064 ++++++++++++++++-
 .../Interface_OpenFOAM/Interface.cpp          |    9 +-
 src/VirtualFluids_GPU/LBM/Simulation.cpp      |   53 +-
 .../Output/MeasurePointWriter.hpp             |  121 ++
 src/VirtualFluids_GPU/Output/WriteData.cpp    |   12 +-
 src/VirtualFluids_GPU/Parameter/Parameter.cpp |    2 +-
 13 files changed, 1376 insertions(+), 70 deletions(-)

diff --git a/src/VirtualFluids_GPU/Calculation/ForceCalculations.cpp b/src/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
index 2bc9b4a17..323bebcef 100644
--- a/src/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
+++ b/src/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
@@ -17,8 +17,8 @@ ForceCalculations::ForceCalculations(Parameter* para)
 
 	Kpcrit = 3.0 / Ta;// 0.3;
 	Tcrit = 3.0 * Ta; // 30.0;
-	Tn = 0.5 * Tcrit;
-	Tv = 0.12 * Tcrit;
+	Tn = 0.5 * Tcrit; //1.0 * Tcrit; //0.5 * Tcrit;
+	Tv = 0.12 * Tcrit; //0.24 * Tcrit; //0.12 * Tcrit;
 
 	Kp = 0.6 * Kpcrit;
 	Ki = Kp / Tn;
diff --git a/src/VirtualFluids_GPU/GPU/BGK27.cu b/src/VirtualFluids_GPU/GPU/BGK27.cu
index 93b50cb68..62a289952 100644
--- a/src/VirtualFluids_GPU/GPU/BGK27.cu
+++ b/src/VirtualFluids_GPU/GPU/BGK27.cu
@@ -1825,32 +1825,32 @@ extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(doubflo omega,
 			doubflo cusq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 			fZERO = fZERO *(one + (-omega)) - (-omega)*   c8over27*  (drho - rho * cusq);
-			fE = fE    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx1)+c9over2*(vx1)*(vx1)-cusq));
-			fW = fW    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx1) + c9over2*(-vx1)*(-vx1) - cusq));
-			fN = fN    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx2)+c9over2*(vx2)*(vx2)-cusq));
-			fS = fS    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx2) + c9over2*(-vx2)*(-vx2) - cusq));
-			fT = fT    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx3)+c9over2*(vx3)*(vx3)-cusq));
-			fB = fB    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx3) + c9over2*(-vx3)*(-vx3) - cusq));
-			fNE = fNE   *(one + (-omega)) - (-omega)*   c1over54*  (drho + rho * (three*(vx1 + vx2) + c9over2*(vx1 + vx2)*(vx1 + vx2) - cusq));
-			fSW = fSW   *(one + (-omega)) - (-omega)*   c1over54*  (drho + rho * (three*(-vx1 - vx2) + c9over2*(-vx1 - vx2)*(-vx1 - vx2) - cusq));
-			fSE = fSE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 - vx2) + c9over2*(vx1 - vx2)*(vx1 - vx2) - cusq));
-			fNW = fNW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 + vx2) + c9over2*(-vx1 + vx2)*(-vx1 + vx2) - cusq));
-			fTE = fTE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 + vx3) + c9over2*(vx1 + vx3)*(vx1 + vx3) - cusq));
-			fBW = fBW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 - vx3) + c9over2*(-vx1 - vx3)*(-vx1 - vx3) - cusq));
-			fBE = fBE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 - vx3) + c9over2*(vx1 - vx3)*(vx1 - vx3) - cusq));
-			fTW = fTW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 + vx3) + c9over2*(-vx1 + vx3)*(-vx1 + vx3) - cusq));
-			fTN = fTN   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx2 + vx3) + c9over2*(vx2 + vx3)*(vx2 + vx3) - cusq));
-			fBS = fBS   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx2 - vx3) + c9over2*(-vx2 - vx3)*(-vx2 - vx3) - cusq));
-			fBN = fBN   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx2 - vx3) + c9over2*(vx2 - vx3)*(vx2 - vx3) - cusq));
-			fTS = fTS   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx2 + vx3) + c9over2*(-vx2 + vx3)*(-vx2 + vx3) - cusq));
-			fTNE = fTNE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 + vx2 + vx3) + c9over2*(vx1 + vx2 + vx3)*(vx1 + vx2 + vx3) - cusq));
-			fBSW = fBSW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 - vx2 - vx3) + c9over2*(-vx1 - vx2 - vx3)*(-vx1 - vx2 - vx3) - cusq));
-			fBNE = fBNE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 + vx2 - vx3) + c9over2*(vx1 + vx2 - vx3)*(vx1 + vx2 - vx3) - cusq));
-			fTSW = fTSW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 - vx2 + vx3) + c9over2*(-vx1 - vx2 + vx3)*(-vx1 - vx2 + vx3) - cusq));
-			fTSE = fTSE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 - vx2 + vx3) + c9over2*(vx1 - vx2 + vx3)*(vx1 - vx2 + vx3) - cusq));
-			fBNW = fBNW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 + vx2 - vx3) + c9over2*(-vx1 + vx2 - vx3)*(-vx1 + vx2 - vx3) - cusq));
-			fBSE = fBSE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 - vx2 - vx3) + c9over2*(vx1 - vx2 - vx3)*(vx1 - vx2 - vx3) - cusq));
-			fTNW = fTNW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 + vx2 + vx3) + c9over2*(-vx1 + vx2 + vx3)*(-vx1 + vx2 + vx3) - cusq));
+			fE    = fE    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx1)+c9over2*(vx1)*(vx1)-cusq));
+			fW    = fW    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx1) + c9over2*(-vx1)*(-vx1) - cusq));
+			fN    = fN    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx2)+c9over2*(vx2)*(vx2)-cusq));
+			fS    = fS    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx2) + c9over2*(-vx2)*(-vx2) - cusq));
+			fT    = fT    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(vx3)+c9over2*(vx3)*(vx3)-cusq));
+			fB    = fB    *(one + (-omega)) - (-omega)*   c2over27*  (drho + rho * (three*(-vx3) + c9over2*(-vx3)*(-vx3) - cusq));
+			fNE   = fNE   *(one + (-omega)) - (-omega)*   c1over54*  (drho + rho * (three*(vx1 + vx2) + c9over2*(vx1 + vx2)*(vx1 + vx2) - cusq));
+			fSW   = fSW   *(one + (-omega)) - (-omega)*   c1over54*  (drho + rho * (three*(-vx1 - vx2) + c9over2*(-vx1 - vx2)*(-vx1 - vx2) - cusq));
+			fSE   = fSE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 - vx2) + c9over2*(vx1 - vx2)*(vx1 - vx2) - cusq));
+			fNW   = fNW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 + vx2) + c9over2*(-vx1 + vx2)*(-vx1 + vx2) - cusq));
+			fTE   = fTE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 + vx3) + c9over2*(vx1 + vx3)*(vx1 + vx3) - cusq));
+			fBW   = fBW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 - vx3) + c9over2*(-vx1 - vx3)*(-vx1 - vx3) - cusq));
+			fBE   = fBE   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx1 - vx3) + c9over2*(vx1 - vx3)*(vx1 - vx3) - cusq));
+			fTW   = fTW   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx1 + vx3) + c9over2*(-vx1 + vx3)*(-vx1 + vx3) - cusq));
+			fTN   = fTN   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx2 + vx3) + c9over2*(vx2 + vx3)*(vx2 + vx3) - cusq));
+			fBS   = fBS   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx2 - vx3) + c9over2*(-vx2 - vx3)*(-vx2 - vx3) - cusq));
+			fBN   = fBN   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(vx2 - vx3) + c9over2*(vx2 - vx3)*(vx2 - vx3) - cusq));
+			fTS   = fTS   *(one + (-omega)) - (-omega)*    c1over54* (drho + rho * (three*(-vx2 + vx3) + c9over2*(-vx2 + vx3)*(-vx2 + vx3) - cusq));
+			fTNE  = fTNE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 + vx2 + vx3) + c9over2*(vx1 + vx2 + vx3)*(vx1 + vx2 + vx3) - cusq));
+			fBSW  = fBSW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 - vx2 - vx3) + c9over2*(-vx1 - vx2 - vx3)*(-vx1 - vx2 - vx3) - cusq));
+			fBNE  = fBNE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 + vx2 - vx3) + c9over2*(vx1 + vx2 - vx3)*(vx1 + vx2 - vx3) - cusq));
+			fTSW  = fTSW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 - vx2 + vx3) + c9over2*(-vx1 - vx2 + vx3)*(-vx1 - vx2 + vx3) - cusq));
+			fTSE  = fTSE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 - vx2 + vx3) + c9over2*(vx1 - vx2 + vx3)*(vx1 - vx2 + vx3) - cusq));
+			fBNW  = fBNW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 + vx2 - vx3) + c9over2*(-vx1 + vx2 - vx3)*(-vx1 + vx2 - vx3) - cusq));
+			fBSE  = fBSE  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(vx1 - vx2 - vx3) + c9over2*(vx1 - vx2 - vx3)*(vx1 - vx2 - vx3) - cusq));
+			fTNW  = fTNW  *(one + (-omega)) - (-omega)*    c1over216*(drho + rho * (three*(-vx1 + vx2 + vx3) + c9over2*(-vx1 + vx2 + vx3)*(-vx1 + vx2 + vx3) - cusq));
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
diff --git a/src/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/VirtualFluids_GPU/GPU/CalcMac27.cu
index 1a31708f7..d70cc9724 100644
--- a/src/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -1695,20 +1695,20 @@ extern "C" __global__ void LBCalcMacMedSP27( doubflo* vxD,
 
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void LBCalcMeasurePoints( doubflo* vxMP,
-												            doubflo* vyMP,
-												            doubflo* vzMP,
-												            doubflo* rhoMP,
-												            unsigned int* kMP,
-												            unsigned int numberOfPointskMP,
-												            unsigned int MPClockCycle,
-												            unsigned int t,
-												            unsigned int* geoD,
-												            unsigned int* neighborX,
-												            unsigned int* neighborY,
-												            unsigned int* neighborZ,
-												            unsigned int size_Mat,
-												            doubflo* DD,
-												            bool evenOrOdd)
+												doubflo* vyMP,
+												doubflo* vzMP,
+												doubflo* rhoMP,
+												unsigned int* kMP,
+												unsigned int numberOfPointskMP,
+												unsigned int MPClockCycle,
+												unsigned int t,
+												unsigned int* geoD,
+												unsigned int* neighborX,
+												unsigned int* neighborY,
+												unsigned int* neighborZ,
+												unsigned int size_Mat,
+												doubflo* DD,
+												bool evenOrOdd)
 {
 	Distributions27 D;
 	if (evenOrOdd==true)
diff --git a/src/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/VirtualFluids_GPU/GPU/Cumulant27.cu
index 13792f3b4..9eae98577 100644
--- a/src/VirtualFluids_GPU/GPU/Cumulant27.cu
+++ b/src/VirtualFluids_GPU/GPU/Cumulant27.cu
@@ -3401,9 +3401,9 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_SP_27( doubflo omega,
 			doubflo fx = forces[0]/(pow(two,level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
 			doubflo fy = forces[1]/(pow(two,level)); //zero;
 			doubflo fz = forces[2]/(pow(two,level)); //zero;
-			vvx += fx;
-			vvy += fy;
-			vvz += fz;
+			vvx += fx*c1o2;
+			vvy += fy*c1o2;
+			vvz += fz*c1o2;
 			////////////////////////////////////////////////////////////////////////////////////
 			//doubflo omega = omega_in;
 			////////////////////////////////////////////////////////////////////////////////////
@@ -9947,9 +9947,9 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SP_27(doubflo omega,
 			doubflo fx = forces[0]/(pow(two,level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
 			doubflo fy = forces[1]/(pow(two,level)); //zero;
 			doubflo fz = forces[2]/(pow(two,level)); //zero;
-			vvx += fx;
-			vvy += fy;
-			vvz += fz;
+			vvx += fx*c1o2;
+			vvy += fy*c1o2;
+			vvz += fz*c1o2;
 			////////////////////////////////////////////////////////////////////////////////////
 			//doubflo omega = omega_in;
 			////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/VirtualFluids_GPU/GPU/GPU_Interface.h
index e18b2ca27..b882422e1 100644
--- a/src/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -293,6 +293,24 @@ extern "C" void KernelWaleCumOneCompSP27(unsigned int numberOfThreads,
 										 doubflo* forces,
 										 bool EvenOrOdd);
 
+extern "C" void KernelWaleCumAA2016CompSP27( unsigned int numberOfThreads,
+											 doubflo s9,
+											 unsigned int* bcMatD,
+											 unsigned int* neighborX,
+											 unsigned int* neighborY,
+											 unsigned int* neighborZ,
+											 unsigned int* neighborWSB,
+											 doubflo* veloX,
+											 doubflo* veloY,
+											 doubflo* veloZ,
+											 doubflo* DD,
+											 doubflo* turbulentViscosity,
+											 int size_Mat,
+											 int size_Array,
+											 int level,
+											 doubflo* forces,
+											 bool EvenOrOdd);
+
 extern "C" void KernelThS7(unsigned int numberOfThreads, 
                            doubflo diffusivity,
                            unsigned int* bcMatD,
diff --git a/src/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index a2a32bc3f..bc962d489 100644
--- a/src/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -262,6 +262,22 @@ extern "C" __global__ void LB_Kernel_Wale_Cum_One_Comp_SP_27(doubflo omega,
 															 doubflo* forces,
 															 bool EvenOrOdd);
 
+extern "C" __global__ void LB_Kernel_Wale_Cum_AA2016_Comp_SP_27( doubflo omega,
+																 unsigned int* bcMatD,
+																 unsigned int* neighborX,
+																 unsigned int* neighborY,
+																 unsigned int* neighborZ,
+																 unsigned int* neighborWSB,
+																 doubflo* veloX,
+																 doubflo* veloY,
+																 doubflo* veloZ,
+																 doubflo* DDStart,
+																 doubflo* turbulentViscosity,
+																 int size_Mat,
+																 int level,
+																 doubflo* forces,
+																 bool EvenOrOdd);
+
 extern "C" __global__ void LB_Kernel_ThS7(doubflo diffusivity,
                                           unsigned int* bcMatD,
                                           unsigned int* neighborX,
diff --git a/src/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/VirtualFluids_GPU/GPU/LBMKernel.cu
index 32259373b..f377dff6f 100644
--- a/src/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -957,6 +957,61 @@ extern "C" void KernelWaleCumOneCompSP27(unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Wale_Cum_One_Comp_SP_27 execution failed"); 
 }
 //////////////////////////////////////////////////////////////////////////
+extern "C" void KernelWaleCumAA2016CompSP27( unsigned int numberOfThreads,
+											 doubflo s9,
+											 unsigned int* bcMatD,
+											 unsigned int* neighborX,
+											 unsigned int* neighborY,
+											 unsigned int* neighborZ,
+											 unsigned int* neighborWSB,
+											 doubflo* veloX,
+											 doubflo* veloY,
+											 doubflo* veloZ,
+											 doubflo* DD,
+											 doubflo* turbulentViscosity,
+											 int size_Mat,
+											 int size_Array,
+											 int level,
+											 doubflo* forces,
+											 bool EvenOrOdd)
+{
+	//int Grid = size_Array / numberOfThreads;
+	//dim3 grid(Grid, 1, 1);
+	//dim3 threads(numberOfThreads, 1, 1 );
+
+	int Grid = (size_Mat / numberOfThreads) + 1;
+	int Grid1, Grid2;
+	if (Grid > 512)
+	{
+		Grid1 = 512;
+		Grid2 = (Grid / Grid1) + 1;
+	}
+	else
+	{
+		Grid1 = 1;
+		Grid2 = Grid;
+	}
+	dim3 grid(Grid1, Grid2, 1);
+	dim3 threads(numberOfThreads, 1, 1);
+
+	LB_Kernel_Wale_Cum_AA2016_Comp_SP_27 <<< grid, threads >>>( s9,
+																bcMatD,
+																neighborX,
+																neighborY,
+																neighborZ,
+																neighborWSB,
+																veloX,
+																veloY,
+																veloZ,
+																DD,
+																turbulentViscosity,
+																size_Mat,
+																level,
+																forces,
+																EvenOrOdd); 
+		getLastCudaError("LB_Kernel_Wale_Cum_AA2016_Comp_SP_27 execution failed"); 
+}
+//////////////////////////////////////////////////////////////////////////
 extern "C" void KernelThS7(unsigned int numberOfThreads, 
                            doubflo diffusivity,
                            unsigned int* bcMatD,
diff --git a/src/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/VirtualFluids_GPU/GPU/WaleCumulant27.cu
index 7730da412..933aeab3c 100644
--- a/src/VirtualFluids_GPU/GPU/WaleCumulant27.cu
+++ b/src/VirtualFluids_GPU/GPU/WaleCumulant27.cu
@@ -252,11 +252,15 @@ extern "C" __global__ void LB_Kernel_Wale_Cum_One_Comp_SP_27(doubflo omega_in,
 				/////////////////////////////////
 				doubflo SumSsq = SumS * SumS;
 				doubflo SumSDsq = SumSd * SumSd;
-				nuTurb = powf(delta, two) * powf(SumSDsq, c3o2) / (powf(SumSsq, c5o2) + powf(SumSDsq, c5o4));
+				nuTurb = powf(delta, two) * powf(SumSDsq, c3o2) / (powf(SumSsq, c5o2) + powf(SumSDsq, c5o4) + smallSingle);
 				/////////////////////////////////
 				//nuTurb = rho * powf(delta, two) * powf(SumSd*SumSd, c3o2) / (powf(SumS*SumS, c5o2) + powf(SumSd*SumSd, c5o4));
 				//nuTurb = powf(delta, two) * powf(SumSd*SumSd, c3o2) / (powf(SumS*SumS, c5o2) + powf(SumSd*SumSd, c5o4));
 			}
+			//Test debug
+			//if (nuTurb > c1o100) nuTurb = c1o100;
+			//if (nuTurb < -c1o100) nuTurb = -c1o100;
+			//nuTurb = c1o100;
 			turbulentViscosity[k] = nuTurb;
 			////////////////////////////////////////////////////////////////////////////////////
 			//the force be with you
@@ -264,14 +268,14 @@ extern "C" __global__ void LB_Kernel_Wale_Cum_One_Comp_SP_27(doubflo omega_in,
 				doubflo fx = forces[0] / (pow(two, level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
 				doubflo fy = forces[1] / (pow(two, level)); //zero;
 				doubflo fz = forces[2] / (pow(two, level)); //zero;
-				vvx += fx;
-				vvy += fy;
-				vvz += fz;
+				vvx += fx*c1o2;
+				vvy += fy*c1o2;
+				vvz += fz*c1o2;
 			}
 			////////////////////////////////////////////////////////////////////////////////////
-			//doubflo nuOld = c1o3 * (one / omega_in - c1o2);
-			//doubflo omega = one / (three * (nuOld + nuTurb) + c1o2);
-			doubflo omega = omega_in + nuTurb;
+			doubflo nuOld = c1o3 * (one / omega_in - c1o2);
+			doubflo omega = one / (three * (nuOld + nuTurb) + c1o2);
+			//doubflo omega = omega_in + nuTurb;
 			////////////////////////////////////////////////////////////////////////////////////
 			//fast
 			doubflo oMdrho = one; // comp special
@@ -1037,3 +1041,1049 @@ extern "C" __global__ void LB_Kernel_Wale_Cum_One_Comp_SP_27(doubflo omega_in,
 }
 ////////////////////////////////////////////////////////////////////////////////
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+extern "C" __global__ void LB_Kernel_Wale_Cum_AA2016_Comp_SP_27( doubflo omega_in,
+																 unsigned int* bcMatD,
+																 unsigned int* neighborX,
+																 unsigned int* neighborY,
+																 unsigned int* neighborZ,
+																 unsigned int* neighborWSB,
+																 doubflo* veloX,
+																 doubflo* veloY,
+																 doubflo* veloZ,
+																 doubflo* DDStart,
+																 doubflo* turbulentViscosity,
+																 int size_Mat,
+																 int level,
+																 doubflo* forces,
+																 bool EvenOrOdd)
+{
+	////////////////////////////////////////////////////////////////////////////////
+	const unsigned  x = threadIdx.x;  // Globaler x-Index 
+	const unsigned  y = blockIdx.x;   // Globaler y-Index 
+	const unsigned  z = blockIdx.y;   // Globaler z-Index 
+
+	const unsigned nx = blockDim.x;
+	const unsigned ny = gridDim.x;
+
+	const unsigned k = nx*(ny*z + y) + x;
+	//////////////////////////////////////////////////////////////////////////
+
+	if(k<size_Mat)
+	{
+		////////////////////////////////////////////////////////////////////////////////
+		unsigned int BC;
+		BC = bcMatD[k];
+
+		if( BC == GEO_FLUID /*(BC != GEO_SOLID) && (BC != GEO_VOID)*/ )
+		{
+			Distributions27 D;
+			if (EvenOrOdd==true)
+			{
+				D.f[dirE   ] = &DDStart[dirE   *size_Mat];
+				D.f[dirW   ] = &DDStart[dirW   *size_Mat];
+				D.f[dirN   ] = &DDStart[dirN   *size_Mat];
+				D.f[dirS   ] = &DDStart[dirS   *size_Mat];
+				D.f[dirT   ] = &DDStart[dirT   *size_Mat];
+				D.f[dirB   ] = &DDStart[dirB   *size_Mat];
+				D.f[dirNE  ] = &DDStart[dirNE  *size_Mat];
+				D.f[dirSW  ] = &DDStart[dirSW  *size_Mat];
+				D.f[dirSE  ] = &DDStart[dirSE  *size_Mat];
+				D.f[dirNW  ] = &DDStart[dirNW  *size_Mat];
+				D.f[dirTE  ] = &DDStart[dirTE  *size_Mat];
+				D.f[dirBW  ] = &DDStart[dirBW  *size_Mat];
+				D.f[dirBE  ] = &DDStart[dirBE  *size_Mat];
+				D.f[dirTW  ] = &DDStart[dirTW  *size_Mat];
+				D.f[dirTN  ] = &DDStart[dirTN  *size_Mat];
+				D.f[dirBS  ] = &DDStart[dirBS  *size_Mat];
+				D.f[dirBN  ] = &DDStart[dirBN  *size_Mat];
+				D.f[dirTS  ] = &DDStart[dirTS  *size_Mat];
+				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+				D.f[dirTNE ] = &DDStart[dirTNE *size_Mat];
+				D.f[dirTSW ] = &DDStart[dirTSW *size_Mat];
+				D.f[dirTSE ] = &DDStart[dirTSE *size_Mat];
+				D.f[dirTNW ] = &DDStart[dirTNW *size_Mat];
+				D.f[dirBNE ] = &DDStart[dirBNE *size_Mat];
+				D.f[dirBSW ] = &DDStart[dirBSW *size_Mat];
+				D.f[dirBSE ] = &DDStart[dirBSE *size_Mat];
+				D.f[dirBNW ] = &DDStart[dirBNW *size_Mat];
+			}
+			else
+			{
+				D.f[dirW   ] = &DDStart[dirE   *size_Mat];
+				D.f[dirE   ] = &DDStart[dirW   *size_Mat];
+				D.f[dirS   ] = &DDStart[dirN   *size_Mat];
+				D.f[dirN   ] = &DDStart[dirS   *size_Mat];
+				D.f[dirB   ] = &DDStart[dirT   *size_Mat];
+				D.f[dirT   ] = &DDStart[dirB   *size_Mat];
+				D.f[dirSW  ] = &DDStart[dirNE  *size_Mat];
+				D.f[dirNE  ] = &DDStart[dirSW  *size_Mat];
+				D.f[dirNW  ] = &DDStart[dirSE  *size_Mat];
+				D.f[dirSE  ] = &DDStart[dirNW  *size_Mat];
+				D.f[dirBW  ] = &DDStart[dirTE  *size_Mat];
+				D.f[dirTE  ] = &DDStart[dirBW  *size_Mat];
+				D.f[dirTW  ] = &DDStart[dirBE  *size_Mat];
+				D.f[dirBE  ] = &DDStart[dirTW  *size_Mat];
+				D.f[dirBS  ] = &DDStart[dirTN  *size_Mat];
+				D.f[dirTN  ] = &DDStart[dirBS  *size_Mat];
+				D.f[dirTS  ] = &DDStart[dirBN  *size_Mat];
+				D.f[dirBN  ] = &DDStart[dirTS  *size_Mat];
+				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+				D.f[dirBSW ] = &DDStart[dirTNE *size_Mat];
+				D.f[dirBNE ] = &DDStart[dirTSW *size_Mat];
+				D.f[dirBNW ] = &DDStart[dirTSE *size_Mat];
+				D.f[dirBSE ] = &DDStart[dirTNW *size_Mat];
+				D.f[dirTSW ] = &DDStart[dirBNE *size_Mat];
+				D.f[dirTNE ] = &DDStart[dirBSW *size_Mat];
+				D.f[dirTNW ] = &DDStart[dirBSE *size_Mat];
+				D.f[dirTSE ] = &DDStart[dirBNW *size_Mat];
+			}
+
+			////////////////////////////////////////////////////////////////////////////////
+			//index
+			//unsigned int kzero= k;
+			//unsigned int ke   = k;
+			unsigned int kw   = neighborX[k];
+			//unsigned int kn   = k;
+			unsigned int ks   = neighborY[k];
+			//unsigned int kt   = k;
+			unsigned int kb   = neighborZ[k];
+			unsigned int ksw  = neighborY[kw];
+			//unsigned int kne  = k;
+			//unsigned int kse  = ks;
+			//unsigned int knw  = kw;
+			unsigned int kbw  = neighborZ[kw];
+			//unsigned int kte  = k;
+			//unsigned int kbe  = kb;
+			//unsigned int ktw  = kw;
+			unsigned int kbs  = neighborZ[ks];
+			//unsigned int ktn  = k;
+			//unsigned int kbn  = kb;
+			//unsigned int kts  = ks;
+			//unsigned int ktse = ks;
+			//unsigned int kbnw = kbw;
+			//unsigned int ktnw = kw;
+			//unsigned int kbse = kbs;
+			//unsigned int ktsw = ksw;
+			//unsigned int kbne = kb;
+			//unsigned int ktne = k;
+			unsigned int kbsw = neighborZ[ksw];
+
+			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+			doubflo mfcbb = (D.f[dirE   ])[k  ];
+			doubflo mfabb = (D.f[dirW   ])[kw ];
+			doubflo mfbcb = (D.f[dirN   ])[k  ];
+			doubflo mfbab = (D.f[dirS   ])[ks ];
+			doubflo mfbbc = (D.f[dirT   ])[k  ];
+			doubflo mfbba = (D.f[dirB   ])[kb ];
+			doubflo mfccb = (D.f[dirNE  ])[k  ];
+			doubflo mfaab = (D.f[dirSW  ])[ksw];
+			doubflo mfcab = (D.f[dirSE  ])[ks ];
+			doubflo mfacb = (D.f[dirNW  ])[kw ];
+			doubflo mfcbc = (D.f[dirTE  ])[k  ];
+			doubflo mfaba = (D.f[dirBW  ])[kbw];
+			doubflo mfcba = (D.f[dirBE  ])[kb ];
+			doubflo mfabc = (D.f[dirTW  ])[kw ];
+			doubflo mfbcc = (D.f[dirTN  ])[k  ];
+			doubflo mfbaa = (D.f[dirBS  ])[kbs];
+			doubflo mfbca = (D.f[dirBN  ])[kb ];
+			doubflo mfbac = (D.f[dirTS  ])[ks ];
+			doubflo mfbbb = (D.f[dirZERO])[k  ];
+			doubflo mfccc = (D.f[dirTNE ])[k  ];
+			doubflo mfaac = (D.f[dirTSW ])[ksw];
+			doubflo mfcac = (D.f[dirTSE ])[ks ];
+			doubflo mfacc = (D.f[dirTNW ])[kw ];
+			doubflo mfcca = (D.f[dirBNE ])[kb ];
+			doubflo mfaaa = (D.f[dirBSW])[kbsw];
+			doubflo mfcaa = (D.f[dirBSE ])[kbs];
+			doubflo mfaca = (D.f[dirBNW ])[kbw];
+			////////////////////////////////////////////////////////////////////////////////////
+			doubflo drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
+							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
+							((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb;
+
+			doubflo rho = one+drho;
+			////////////////////////////////////////////////////////////////////////////////////
+			//slow
+			doubflo vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
+						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
+						       (mfcbb-mfabb)) / rho;
+			doubflo vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) + 
+				             (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
+				               (mfbcb-mfbab)) / rho;
+			doubflo vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) + 
+				             (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
+				               (mfbbc-mfbba)) / rho;
+			////////////////////////////////////////////////////////////////////////////////////
+			doubflo nuTurb = zero;
+			{
+				/////////////      Wale Model     ///////////////
+				//neighbor index
+				unsigned int kPx   = neighborX[k];
+				unsigned int kPy   = neighborY[k];
+				unsigned int kPz   = neighborZ[k];
+				unsigned int kMxyz = neighborWSB[k];
+				unsigned int kMx   = neighborZ[neighborY[kMxyz]];
+				unsigned int kMy   = neighborZ[neighborX[kMxyz]];
+				unsigned int kMz   = neighborY[neighborX[kMxyz]];
+				//getVeloX//
+				doubflo veloXNeighborPx = veloX[kPx];
+				doubflo veloXNeighborMx = veloX[kMx];
+				doubflo veloXNeighborPy = veloX[kPy];
+				doubflo veloXNeighborMy = veloX[kMy];
+				doubflo veloXNeighborPz = veloX[kPz];
+				doubflo veloXNeighborMz = veloX[kMz];
+				//getVeloY//
+				doubflo veloYNeighborPx = veloY[kPx];
+				doubflo veloYNeighborMx = veloY[kMx];
+				doubflo veloYNeighborPy = veloY[kPy];
+				doubflo veloYNeighborMy = veloY[kMy];
+				doubflo veloYNeighborPz = veloY[kPz];
+				doubflo veloYNeighborMz = veloY[kMz];
+				//getVeloZ//
+				doubflo veloZNeighborPx = veloZ[kPx];
+				doubflo veloZNeighborMx = veloZ[kMx];
+				doubflo veloZNeighborPy = veloZ[kPy];
+				doubflo veloZNeighborMy = veloZ[kMy];
+				doubflo veloZNeighborPz = veloZ[kPz];
+				doubflo veloZNeighborMz = veloZ[kMz];
+				//partial Div vx in x, y, z//
+				doubflo dxvx = (veloXNeighborPx - veloXNeighborMx) / two; //deltaX * two??
+				doubflo dyvx = (veloXNeighborPy - veloXNeighborMy) / two; //deltaX * two??
+				doubflo dzvx = (veloXNeighborPz - veloXNeighborMz) / two; //deltaX * two??
+			    //partial Div vy in x, y, z//
+				doubflo dxvy = (veloYNeighborPx - veloYNeighborMx) / two; //deltaX * two??
+				doubflo dyvy = (veloYNeighborPy - veloYNeighborMy) / two; //deltaX * two??
+				doubflo dzvy = (veloYNeighborPz - veloYNeighborMz) / two; //deltaX * two??
+			    //partial Div vz in x, y, z//
+				doubflo dxvz = (veloZNeighborPx - veloZNeighborMx) / two; //deltaX * two??
+				doubflo dyvz = (veloZNeighborPy - veloZNeighborMy) / two; //deltaX * two??
+				doubflo dzvz = (veloZNeighborPz - veloZNeighborMz) / two; //deltaX * two??
+				//SumSd
+				doubflo SumSd = 
+					c1o2 * powf(dzvx, four) + 
+					c1o2 * powf(dzvy, four) + 
+					c2o3 * powf(dzvz, four) +
+					c2o3 * powf(dyvy, four) +
+					c1o2 * powf(dyvz, four) +
+					c2o3 * powf(dxvx, four) +
+					c1o2 * powf(dxvy, four) +
+					c1o2 * powf(dxvz, four) +
+					c1o2 * powf(dyvx, four) + 
+					powf(dyvx, two) * powf(dxvy, two) +
+					powf(dzvx, two) * powf(dxvz, two) +
+					powf(dzvy, two) * powf(dyvz, two) -
+					c2o3 * powf(dzvz, two) * powf(dyvy, two) -
+					c2o3 * powf(dzvz, two) * powf(dxvx, two) -
+					c2o3 * powf(dyvy, two) * powf(dxvx, two);
+				//SumS
+				doubflo SumS =
+					powf(dxvx, two) +
+					powf(dyvy, two) +
+					powf(dzvz, two) +
+					c1o2 * powf(dyvx + dxvy, two) +
+					c1o2 * powf(dzvx + dxvz, two) + 
+					c1o2 * powf(dyvz + dzvy, two);
+				//nu turbulent
+				doubflo coefficient = 0.325;
+				doubflo delta = coefficient * one;
+				/////////////////////////////////
+				doubflo SumSsq = SumS * SumS;
+				doubflo SumSDsq = SumSd * SumSd;
+				nuTurb = powf(delta, two) * powf(SumSDsq, c3o2) / (powf(SumSsq, c5o2) + powf(SumSDsq, c5o4) + smallSingle);
+				/////////////////////////////////
+				//nuTurb = rho * powf(delta, two) * powf(SumSd*SumSd, c3o2) / (powf(SumS*SumS, c5o2) + powf(SumSd*SumSd, c5o4));
+				//nuTurb = powf(delta, two) * powf(SumSd*SumSd, c3o2) / (powf(SumS*SumS, c5o2) + powf(SumSd*SumSd, c5o4));
+			}
+			//Test debug
+			//if (nuTurb > c1o100) nuTurb = c1o100;
+			//if (nuTurb < -c1o100) nuTurb = -c1o100;
+			//nuTurb = c1o100;
+			turbulentViscosity[k] = nuTurb;
+			////////////////////////////////////////////////////////////////////////////////////
+			//the force be with you
+			{
+				doubflo fx = forces[0] / (pow(two, level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
+				doubflo fy = forces[1] / (pow(two, level)); //zero;
+				doubflo fz = forces[2] / (pow(two, level)); //zero;
+				vvx += fx*c1o2;
+				vvy += fy*c1o2;
+				vvz += fz*c1o2;
+			}
+			////////////////////////////////////////////////////////////////////////////////////
+			doubflo nuOld = c1o3 * (one / omega_in - c1o2);
+			doubflo omega = one / (three * (nuOld + nuTurb) + c1o2);
+			//doubflo omega = omega_in + nuTurb;
+			////////////////////////////////////////////////////////////////////////////////////
+			//fast
+			doubflo oMdrho = one; // comp special
+			doubflo m0, m1, m2;	
+			doubflo vx2;
+			doubflo vy2;
+			doubflo vz2;
+			vx2=vvx*vvx;
+			vy2=vvy*vvy;
+			vz2=vvz*vvz;
+			//////////////////////////////////////////////////////////////////////////////////////
+			doubflo wadjust;
+			doubflo qudricLimitP = 0.01f;// * 0.0001f;
+			doubflo qudricLimitM = 0.01f;// * 0.0001f;
+			doubflo qudricLimitD = 0.01f;// * 0.001f;
+			////////////////////////////////////////////////////////////////////////////////////
+			//Hin
+			////////////////////////////////////////////////////////////////////////////////////
+			// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// Z - Dir
+			m2    = mfaaa	+ mfaac;
+			m1    = mfaac	- mfaaa;
+			m0    = m2		+ mfaab;
+			mfaaa = m0;
+			m0   += c1o36 * oMdrho;	
+			mfaab = m1 -		m0 * vvz;
+			mfaac = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaba  + mfabc;
+			m1    = mfabc  - mfaba;
+			m0    = m2		+ mfabb;
+			mfaba = m0;
+			m0   += c1o9 * oMdrho;
+			mfabb = m1 -		m0 * vvz;
+			mfabc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaca  + mfacc;
+			m1    = mfacc  - mfaca;
+			m0    = m2		+ mfacb;
+			mfaca = m0;
+			m0   += c1o36 * oMdrho;
+			mfacb = m1 -		m0 * vvz;
+			mfacc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbaa	+ mfbac;
+			m1    = mfbac	- mfbaa;
+			m0    = m2		+ mfbab;
+			mfbaa = m0;
+			m0   += c1o9 * oMdrho;
+			mfbab = m1 -		m0 * vvz;
+			mfbac = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbba  + mfbbc;
+			m1    = mfbbc  - mfbba;
+			m0    = m2		+ mfbbb;
+			mfbba = m0;
+			m0   += c4o9 * oMdrho;
+			mfbbb = m1 -		m0 * vvz;
+			mfbbc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbca  + mfbcc;
+			m1    = mfbcc  - mfbca;
+			m0    = m2		+ mfbcb;
+			mfbca = m0;
+			m0   += c1o9 * oMdrho;
+			mfbcb = m1 -		m0 * vvz;
+			mfbcc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcaa	+ mfcac;
+			m1    = mfcac	- mfcaa;
+			m0    = m2		+ mfcab;
+			mfcaa = m0;
+			m0   += c1o36 * oMdrho;
+			mfcab = m1 -		m0 * vvz;
+			mfcac = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcba  + mfcbc;
+			m1    = mfcbc  - mfcba;
+			m0    = m2		+ mfcbb;
+			mfcba = m0;
+			m0   += c1o9 * oMdrho;
+			mfcbb = m1 -		m0 * vvz;
+			mfcbc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcca  + mfccc;
+			m1    = mfccc  - mfcca;
+			m0    = m2		+ mfccb;
+			mfcca = m0;
+			m0   += c1o36 * oMdrho;
+			mfccb = m1 -		m0 * vvz;
+			mfccc = m2 - two*	m1 * vvz + vz2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// Y - Dir
+			m2    = mfaaa	+ mfaca;
+			m1    = mfaca	- mfaaa;
+			m0    = m2		+ mfaba;
+			mfaaa = m0;
+			m0   += c1o6 * oMdrho;
+			mfaba = m1 -		m0 * vvy;
+			mfaca = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaab  + mfacb;
+			m1    = mfacb  - mfaab;
+			m0    = m2		+ mfabb;
+			mfaab = m0;
+			mfabb = m1 -		m0 * vvy;
+			mfacb = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaac  + mfacc;
+			m1    = mfacc  - mfaac;
+			m0    = m2		+ mfabc;
+			mfaac = m0;
+			m0   += c1o18 * oMdrho;
+			mfabc = m1 -		m0 * vvy;
+			mfacc = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbaa	+ mfbca;
+			m1    = mfbca	- mfbaa;
+			m0    = m2		+ mfbba;
+			mfbaa = m0;
+			m0   += c2o3 * oMdrho;
+			mfbba = m1 -		m0 * vvy;
+			mfbca = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbab  + mfbcb;
+			m1    = mfbcb  - mfbab;
+			m0    = m2		+ mfbbb;
+			mfbab = m0;
+			mfbbb = m1 -		m0 * vvy;
+			mfbcb = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfbac  + mfbcc;
+			m1    = mfbcc  - mfbac;
+			m0    = m2		+ mfbbc;
+			mfbac = m0;
+			m0   += c2o9 * oMdrho;
+			mfbbc = m1 -		m0 * vvy;
+			mfbcc = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcaa	+ mfcca;
+			m1    = mfcca	- mfcaa;
+			m0    = m2		+ mfcba;
+			mfcaa = m0;
+			m0   += c1o6 * oMdrho;
+			mfcba = m1 -		m0 * vvy;
+			mfcca = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcab  + mfccb;
+			m1    = mfccb  - mfcab;
+			m0    = m2		+ mfcbb;
+			mfcab = m0;
+			mfcbb = m1 -		m0 * vvy;
+			mfccb = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfcac  + mfccc;
+			m1    = mfccc  - mfcac;
+			m0    = m2		+ mfcbc;
+			mfcac = m0;
+			m0   += c1o18 * oMdrho;
+			mfcbc = m1 -		m0 * vvy;
+			mfccc = m2 - two*	m1 * vvy + vy2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// X - Dir
+			m2    = mfaaa	+ mfcaa;
+			m1    = mfcaa	- mfaaa;
+			m0    = m2		+ mfbaa;
+			mfaaa = m0;
+			m0   += one* oMdrho;
+			mfbaa = m1 -		m0 * vvx;
+			mfcaa = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaba  + mfcba;
+			m1    = mfcba  - mfaba;
+			m0    = m2		+ mfbba;
+			mfaba = m0;
+			mfbba = m1 -		m0 * vvx;
+			mfcba = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaca  + mfcca;
+			m1    = mfcca  - mfaca;
+			m0    = m2		+ mfbca;
+			mfaca = m0;
+			m0   += c1o3 * oMdrho;
+			mfbca = m1 -		m0 * vvx;
+			mfcca = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaab	+ mfcab;
+			m1    = mfcab	- mfaab;
+			m0    = m2		+ mfbab;
+			mfaab = m0;
+			mfbab = m1 -		m0 * vvx;
+			mfcab = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfabb  + mfcbb;
+			m1    = mfcbb  - mfabb;
+			m0    = m2		+ mfbbb;
+			mfabb = m0;
+			mfbbb = m1 -		m0 * vvx;
+			mfcbb = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfacb  + mfccb;
+			m1    = mfccb  - mfacb;
+			m0    = m2		+ mfbcb;
+			mfacb = m0;
+			mfbcb = m1 -		m0 * vvx;
+			mfccb = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfaac	+ mfcac;
+			m1    = mfcac	- mfaac;
+			m0    = m2		+ mfbac;
+			mfaac = m0;
+			m0   += c1o3 * oMdrho;
+			mfbac = m1 -		m0 * vvx;
+			mfcac = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfabc  + mfcbc;
+			m1    = mfcbc  - mfabc;
+			m0    = m2		+ mfbbc;
+			mfabc = m0;
+			mfbbc = m1 -		m0 * vvx;
+			mfcbc = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			m2    = mfacc  + mfccc;
+			m1    = mfccc  - mfacc;
+			m0    = m2		+ mfbcc;
+			mfacc = m0;
+			m0   += c1o9 * oMdrho;
+			mfbcc = m1 -		m0 * vvx;
+			mfccc = m2 - two*	m1 * vvx + vx2 * m0;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+
+
+			////////////////////////////////////////////////////////////////////////////////////
+			// Cumulants
+			////////////////////////////////////////////////////////////////////////////////////
+			doubflo OxxPyyPzz = one;	//set the bulk viscosity one is high / two is very low and zero is (too) high ... (also called omega 2)
+
+			////////////////////////////////////////////////////////////
+			//3.
+			//////////////////////////////
+			doubflo OxyyPxzz  = eight*(-two+omega)*(one+two*omega)/(-eight-fourteen*omega+seven*omega*omega);//one;
+			doubflo OxyyMxzz  = eight*(-two+omega)*(-seven+four*omega)/(fiftysix-fifty*omega+nine*omega*omega);//one;
+			doubflo Oxyz      = twentyfour*(-two+omega)*(-two-seven*omega+three*omega*omega)/(fourtyeight+c152*omega-c130*omega*omega+twentynine*omega*omega*omega);//one;
+			////////////////////////////////////////////////////////////
+			//4.
+			//////////////////////////////
+			doubflo O4        = one;
+			//////////////////////////////
+			//doubflo O4        = omega;//TRT
+			////////////////////////////////////////////////////////////
+			//5.
+			//////////////////////////////
+			doubflo O5        = one;
+			////////////////////////////////////////////////////////////
+			//6.
+			//////////////////////////////
+			doubflo O6        = one;
+			////////////////////////////////////////////////////////////
+
+
+			//central moments to cumulants
+			//4.
+			doubflo CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;
+			doubflo CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho;
+			doubflo CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho;
+				  	 		
+			doubflo CUMcca = mfcca - (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho  - c1o9*(drho/rho));
+			doubflo CUMcac = mfcac - (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho  - c1o9*(drho/rho));
+			doubflo CUMacc = mfacc - (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho  - c1o9*(drho/rho));
+
+			//5.
+			doubflo CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) ) / rho ;
+			doubflo CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc) ) / rho ;
+			doubflo CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab) ) / rho ;
+			
+			//6.
+
+			doubflo CUMccc = mfccc + ((-four *  mfbbb * mfbbb  
+							-           (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+							-    four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+							-     two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+							+(   four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+							+     two * (mfcaa * mfaca * mfaac)
+							+ sixteen *  mfbba * mfbab * mfabb) / (rho * rho)
+							-    c1o3 * (mfacc + mfcac + mfcca) /rho 
+							-    c1o9 * (mfcaa + mfaca + mfaac) /rho 
+							+(    two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) 
+							+           (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3 
+							+ c1o27*((drho * drho - drho)/(rho*rho)));
+
+			//2.
+			// linear combinations
+			doubflo mxxPyyPzz = mfcaa + mfaca + mfaac;
+			doubflo mxxMyy    = mfcaa - mfaca;
+			doubflo mxxMzz	   = mfcaa - mfaac;
+			
+			////////////////////////////////////////////////////////////////////////////
+            doubflo Dxy =-three*omega*mfbba;
+            doubflo Dxz =-three*omega*mfbab;
+            doubflo Dyz =-three*omega*mfabb;
+
+			//3.
+			// linear combinations
+
+			doubflo mxxyPyzz = mfcba + mfabc;
+			doubflo mxxyMyzz = mfcba - mfabc;
+
+			doubflo mxxzPyyz = mfcab + mfacb;
+			doubflo mxxzMyyz = mfcab - mfacb;
+
+			doubflo mxyyPxzz = mfbca + mfbac;
+			doubflo mxyyMxzz = mfbca - mfbac;
+
+ 			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ 			//incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
+ 			
+ 				doubflo dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+ 				doubflo dyuy = dxux + omega * c3o2 * mxxMyy;
+ 				doubflo dzuz = dxux + omega * c3o2 * mxxMzz;
+ 
+ 				//relax
+				mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+ 				mxxMyy    += omega * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+ 				mxxMzz    += omega * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+ 
+ 			///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ 			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ 			////no correction
+ 			//mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz);//-magicBulk*OxxPyyPzz;
+ 			//mxxMyy    += -(-omega) * (-mxxMyy);
+ 			//mxxMzz    += -(-omega) * (-mxxMzz);
+ 			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+			mfabb     += omega * (-mfabb);
+			mfbab     += omega * (-mfbab);
+			mfbba     += omega * (-mfbba);
+			//////////////////////////////////////////////////////////////////////////
+
+			// linear combinations back
+			mfcaa = c1o3 * (       mxxMyy +      mxxMzz + mxxPyyPzz);
+			mfaca = c1o3 * (-two*  mxxMyy +      mxxMzz + mxxPyyPzz);
+			mfaac = c1o3 * (       mxxMyy - two* mxxMzz + mxxPyyPzz);
+
+
+			//relax
+			//////////////////////////////////////////////////////////////////////////
+			//das ist der limiter
+ 			wadjust    = Oxyz+(one-Oxyz)*abs(mfbbb)/(abs(mfbbb)+qudricLimitD);
+ 			mfbbb     += wadjust * (-mfbbb);
+ 			wadjust    = OxyyPxzz+(one-OxyyPxzz)*abs(mxxyPyzz)/(abs(mxxyPyzz)+qudricLimitP);
+ 			mxxyPyzz  += wadjust * (-mxxyPyzz);
+ 			wadjust    = OxyyMxzz+(one-OxyyMxzz)*abs(mxxyMyzz)/(abs(mxxyMyzz)+qudricLimitM);
+ 			mxxyMyzz  += wadjust * (-mxxyMyzz);
+ 			wadjust    = OxyyPxzz+(one-OxyyPxzz)*abs(mxxzPyyz)/(abs(mxxzPyyz)+qudricLimitP);
+ 			mxxzPyyz  += wadjust * (-mxxzPyyz);
+ 			wadjust    = OxyyMxzz+(one-OxyyMxzz)*abs(mxxzMyyz)/(abs(mxxzMyyz)+qudricLimitM);
+ 			mxxzMyyz  += wadjust * (-mxxzMyyz);
+ 			wadjust    = OxyyPxzz+(one-OxyyPxzz)*abs(mxyyPxzz)/(abs(mxyyPxzz)+qudricLimitP);
+ 			mxyyPxzz  += wadjust * (-mxyyPxzz);
+ 			wadjust    = OxyyMxzz+(one-OxyyMxzz)*abs(mxyyMxzz)/(abs(mxyyMxzz)+qudricLimitM);
+ 			mxyyMxzz  += wadjust * (-mxyyMxzz);
+			//////////////////////////////////////////////////////////////////////////
+			//ohne limiter
+			//mfbbb     += OxyyMxzz * (-mfbbb);
+			//mxxyPyzz  += OxyyPxzz * (-mxxyPyzz);
+			//mxxyMyzz  += OxyyMxzz * (-mxxyMyzz);
+			//mxxzPyyz  += OxyyPxzz * (-mxxzPyyz);
+			//mxxzMyyz  += OxyyMxzz * (-mxxzMyyz);
+			//mxyyPxzz  += OxyyPxzz * (-mxyyPxzz);
+			//mxyyMxzz  += OxyyMxzz * (-mxyyMxzz);
+			//////////////////////////////////////////////////////////////////////////
+
+			// linear combinations back
+			mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2;
+			mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
+			mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2;
+			mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
+			mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2;
+			mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+
+			//4.
+			//////////////////////////////////////////////////////////////////////////
+			//mit limiter
+ 		//	wadjust    = O4+(one-O4)*abs(CUMacc)/(abs(CUMacc)+qudricLimit);
+			//CUMacc    += wadjust * (-CUMacc);
+ 		//	wadjust    = O4+(one-O4)*abs(CUMcac)/(abs(CUMcac)+qudricLimit);
+			//CUMcac    += wadjust * (-CUMcac); 
+ 		//	wadjust    = O4+(one-O4)*abs(CUMcca)/(abs(CUMcca)+qudricLimit);
+			//CUMcca    += wadjust * (-CUMcca); 
+
+ 		//	wadjust    = O4+(one-O4)*abs(CUMbbc)/(abs(CUMbbc)+qudricLimit);
+			//CUMbbc    += wadjust * (-CUMbbc); 
+ 		//	wadjust    = O4+(one-O4)*abs(CUMbcb)/(abs(CUMbcb)+qudricLimit);
+			//CUMbcb    += wadjust * (-CUMbcb); 
+ 		//	wadjust    = O4+(one-O4)*abs(CUMcbb)/(abs(CUMcbb)+qudricLimit);
+			//CUMcbb    += wadjust * (-CUMcbb); 
+			//////////////////////////////////////////////////////////////////////////
+			doubflo A = (four + two*omega - three*omega*omega) / (two - seven*omega + five*omega*omega);
+			doubflo B = (four + twentyeight*omega - fourteen*omega*omega) / (six - twentyone*omega + fiveteen*omega*omega);
+			//////////////////////////////////////////////////////////////////////////
+			//ohne limiter
+			//CUMacc += O4 * (-CUMacc); 
+			//CUMcac += O4 * (-CUMcac); 
+			//CUMcca += O4 * (-CUMcca); 
+			//CUMbbc += O4 * (-CUMbbc); 
+			//CUMbcb += O4 * (-CUMbcb); 
+			//CUMcbb += O4 * (-CUMcbb); 
+			CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
+			CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
+			CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
+			CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B + (one - O4) * (CUMbbc);
+			CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B + (one - O4) * (CUMbcb);
+			CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B + (one - O4) * (CUMcbb);
+			//////////////////////////////////////////////////////////////////////////
+			
+					
+			//5.
+			CUMbcc += O5 * (-CUMbcc);
+			CUMcbc += O5 * (-CUMcbc);
+			CUMccb += O5 * (-CUMccb);
+
+			//6.
+			CUMccc += O6 * (-CUMccc);
+			
+
+
+			//back cumulants to central moments
+			//4.
+			mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + two * mfbba * mfbab) / rho;
+			mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + two * mfbba * mfabb) / rho;
+			mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + two * mfbab * mfabb) / rho;
+						   
+			mfcca = CUMcca + (((mfcaa * mfaca + two * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho  - c1o9*(drho/rho));
+			mfcac = CUMcac + (((mfcaa * mfaac + two * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho  - c1o9*(drho/rho));
+			mfacc = CUMacc + (((mfaac * mfaca + two * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho  - c1o9*(drho/rho));
+
+			//5.
+			mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + four * mfabb * mfbbb + two * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac) ) / rho ;
+			mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + four * mfbab * mfbbb + two * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc) ) / rho ;
+			mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + four * mfbba * mfbbb + two * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab) ) / rho ;
+			
+			//6.
+
+			mfccc = CUMccc - ((-four *  mfbbb * mfbbb  
+							-           (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+							-    four * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+							-     two * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+							+(   four * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+							+     two * (mfcaa * mfaca * mfaac)
+							+ sixteen *  mfbba * mfbab * mfabb) / (rho * rho)
+							-    c1o3 * (mfacc + mfcac + mfcca) /rho 
+							-    c1o9 * (mfcaa + mfaca + mfaac) /rho 
+							+(    two * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) 
+							+           (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3 
+							+ c1o27*((drho * drho - drho)/(rho*rho)));
+			////////////////////////////////////////////////////////////////////////////////////
+
+			////////////////////////////////////////////////////////////////////////////////////
+			//the force be with you
+			mfbaa = -mfbaa;
+			mfaba = -mfaba;
+			mfaab = -mfaab;
+			////////////////////////////////////////////////////////////////////////////////////
+
+
+			////////////////////////////////////////////////////////////////////////////////////
+			//back
+			////////////////////////////////////////////////////////////////////////////////////
+			//mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// Z - Dir
+			m0 =  mfaac * c1o2 +      mfaab * (vvz - c1o2) + (mfaaa + one* oMdrho) * (     vz2 - vvz) * c1o2; 
+			m1 = -mfaac        - two* mfaab *  vvz         +  mfaaa                * (one- vz2)              - one* oMdrho * vz2; 
+			m2 =  mfaac * c1o2 +      mfaab * (vvz + c1o2) + (mfaaa + one* oMdrho) * (     vz2 + vvz) * c1o2;
+			mfaaa = m0;
+			mfaab = m1;
+			mfaac = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfabc * c1o2 +      mfabb * (vvz - c1o2) + mfaba * (     vz2 - vvz) * c1o2; 
+			m1 = -mfabc        - two* mfabb *  vvz         + mfaba * (one- vz2); 
+			m2 =  mfabc * c1o2 +      mfabb * (vvz + c1o2) + mfaba * (     vz2 + vvz) * c1o2;
+			mfaba = m0;
+			mfabb = m1;
+			mfabc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfacc * c1o2 +      mfacb * (vvz - c1o2) + (mfaca + c1o3 * oMdrho) * (     vz2 - vvz) * c1o2; 
+			m1 = -mfacc        - two* mfacb *  vvz         +  mfaca                  * (one- vz2)              - c1o3 * oMdrho * vz2; 
+			m2 =  mfacc * c1o2 +      mfacb * (vvz + c1o2) + (mfaca + c1o3 * oMdrho) * (     vz2 + vvz) * c1o2;
+			mfaca = m0;
+			mfacb = m1;
+			mfacc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfbac * c1o2 +      mfbab * (vvz - c1o2) + mfbaa * (     vz2 - vvz) * c1o2; 
+			m1 = -mfbac        - two* mfbab *  vvz         + mfbaa * (one- vz2); 
+			m2 =  mfbac * c1o2 +      mfbab * (vvz + c1o2) + mfbaa * (     vz2 + vvz) * c1o2;
+			mfbaa = m0;
+			mfbab = m1;
+			mfbac = m2;
+			/////////b//////////////////////////////////////////////////////////////////////////
+			m0 =  mfbbc * c1o2 +      mfbbb * (vvz - c1o2) + mfbba * (     vz2 - vvz) * c1o2; 
+			m1 = -mfbbc        - two* mfbbb *  vvz         + mfbba * (one- vz2); 
+			m2 =  mfbbc * c1o2 +      mfbbb * (vvz + c1o2) + mfbba * (     vz2 + vvz) * c1o2;
+			mfbba = m0;
+			mfbbb = m1;
+			mfbbc = m2;
+			/////////b//////////////////////////////////////////////////////////////////////////
+			m0 =  mfbcc * c1o2 +      mfbcb * (vvz - c1o2) + mfbca * (     vz2 - vvz) * c1o2; 
+			m1 = -mfbcc        - two* mfbcb *  vvz         + mfbca * (one- vz2); 
+			m2 =  mfbcc * c1o2 +      mfbcb * (vvz + c1o2) + mfbca * (     vz2 + vvz) * c1o2;
+			mfbca = m0;
+			mfbcb = m1;
+			mfbcc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcac * c1o2 +      mfcab * (vvz - c1o2) + (mfcaa + c1o3 * oMdrho) * (     vz2 - vvz) * c1o2; 
+			m1 = -mfcac        - two* mfcab *  vvz         +  mfcaa                  * (one- vz2)              - c1o3 * oMdrho * vz2; 
+			m2 =  mfcac * c1o2 +      mfcab * (vvz + c1o2) + (mfcaa + c1o3 * oMdrho) * (     vz2 + vvz) * c1o2;
+			mfcaa = m0;
+			mfcab = m1;
+			mfcac = m2;
+			/////////c//////////////////////////////////////////////////////////////////////////
+			m0 =  mfcbc * c1o2 +      mfcbb * (vvz - c1o2) + mfcba * (     vz2 - vvz) * c1o2; 
+			m1 = -mfcbc        - two* mfcbb *  vvz         + mfcba * (one- vz2); 
+			m2 =  mfcbc * c1o2 +      mfcbb * (vvz + c1o2) + mfcba * (     vz2 + vvz) * c1o2;
+			mfcba = m0;
+			mfcbb = m1;
+			mfcbc = m2;
+			/////////c//////////////////////////////////////////////////////////////////////////
+			m0 =  mfccc * c1o2 +      mfccb * (vvz - c1o2) + (mfcca + c1o9 * oMdrho) * (     vz2 - vvz) * c1o2; 
+			m1 = -mfccc        - two* mfccb *  vvz         +  mfcca                  * (one- vz2)              - c1o9 * oMdrho * vz2; 
+			m2 =  mfccc * c1o2 +      mfccb * (vvz + c1o2) + (mfcca + c1o9 * oMdrho) * (     vz2 + vvz) * c1o2;
+			mfcca = m0;
+			mfccb = m1;
+			mfccc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			//mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// Y - Dir
+			m0 =  mfaca * c1o2 +      mfaba * (vvy - c1o2) + (mfaaa + c1o6 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfaca        - two* mfaba *  vvy         +  mfaaa                  * (one- vy2)              - c1o6 * oMdrho * vy2; 
+			m2 =  mfaca * c1o2 +      mfaba * (vvy + c1o2) + (mfaaa + c1o6 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfaaa = m0;
+			mfaba = m1;
+			mfaca = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfacb * c1o2 +      mfabb * (vvy - c1o2) + (mfaab + c2o3 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfacb        - two* mfabb *  vvy         +  mfaab                  * (one- vy2)              - c2o3 * oMdrho * vy2; 
+			m2 =  mfacb * c1o2 +      mfabb * (vvy + c1o2) + (mfaab + c2o3 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfaab = m0;
+			mfabb = m1;
+			mfacb = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfacc * c1o2 +      mfabc * (vvy - c1o2) + (mfaac + c1o6 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfacc        - two* mfabc *  vvy         +  mfaac                  * (one- vy2)              - c1o6 * oMdrho * vy2; 
+			m2 =  mfacc * c1o2 +      mfabc * (vvy + c1o2) + (mfaac + c1o6 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfaac = m0;
+			mfabc = m1;
+			mfacc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfbca * c1o2 +      mfbba * (vvy - c1o2) + mfbaa * (     vy2 - vvy) * c1o2; 
+			m1 = -mfbca        - two* mfbba *  vvy         + mfbaa * (one- vy2); 
+			m2 =  mfbca * c1o2 +      mfbba * (vvy + c1o2) + mfbaa * (     vy2 + vvy) * c1o2;
+			mfbaa = m0;
+			mfbba = m1;
+			mfbca = m2;
+			/////////b//////////////////////////////////////////////////////////////////////////
+			m0 =  mfbcb * c1o2 +      mfbbb * (vvy - c1o2) + mfbab * (     vy2 - vvy) * c1o2; 
+			m1 = -mfbcb        - two* mfbbb *  vvy         + mfbab * (one- vy2); 
+			m2 =  mfbcb * c1o2 +      mfbbb * (vvy + c1o2) + mfbab * (     vy2 + vvy) * c1o2;
+			mfbab = m0;
+			mfbbb = m1;
+			mfbcb = m2;
+			/////////b//////////////////////////////////////////////////////////////////////////
+			m0 =  mfbcc * c1o2 +      mfbbc * (vvy - c1o2) + mfbac * (     vy2 - vvy) * c1o2; 
+			m1 = -mfbcc        - two* mfbbc *  vvy         + mfbac * (one- vy2); 
+			m2 =  mfbcc * c1o2 +      mfbbc * (vvy + c1o2) + mfbac * (     vy2 + vvy) * c1o2;
+			mfbac = m0;
+			mfbbc = m1;
+			mfbcc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcca * c1o2 +      mfcba * (vvy - c1o2) + (mfcaa + c1o18 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfcca        - two* mfcba *  vvy         +  mfcaa                   * (one- vy2)              - c1o18 * oMdrho * vy2; 
+			m2 =  mfcca * c1o2 +      mfcba * (vvy + c1o2) + (mfcaa + c1o18 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfcaa = m0;
+			mfcba = m1;
+			mfcca = m2;
+			/////////c//////////////////////////////////////////////////////////////////////////
+			m0 =  mfccb * c1o2 +      mfcbb * (vvy - c1o2) + (mfcab + c2o9 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfccb        - two* mfcbb *  vvy         +  mfcab                  * (one- vy2)              - c2o9 * oMdrho * vy2; 
+			m2 =  mfccb * c1o2 +      mfcbb * (vvy + c1o2) + (mfcab + c2o9 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfcab = m0;
+			mfcbb = m1;
+			mfccb = m2;
+			/////////c//////////////////////////////////////////////////////////////////////////
+			m0 =  mfccc * c1o2 +      mfcbc * (vvy - c1o2) + (mfcac + c1o18 * oMdrho) * (     vy2 - vvy) * c1o2; 
+			m1 = -mfccc        - two* mfcbc *  vvy         +  mfcac                   * (one- vy2)              - c1o18 * oMdrho * vy2; 
+			m2 =  mfccc * c1o2 +      mfcbc * (vvy + c1o2) + (mfcac + c1o18 * oMdrho) * (     vy2 + vvy) * c1o2;
+			mfcac = m0;
+			mfcbc = m1;
+			mfccc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			//mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
+			////////////////////////////////////////////////////////////////////////////////////
+			// X - Dir
+			m0 =  mfcaa * c1o2 +      mfbaa * (vvx - c1o2) + (mfaaa + c1o36 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcaa        - two* mfbaa *  vvx         +  mfaaa                   * (one- vx2)              - c1o36 * oMdrho * vx2; 
+			m2 =  mfcaa * c1o2 +      mfbaa * (vvx + c1o2) + (mfaaa + c1o36 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfaaa = m0;
+			mfbaa = m1;
+			mfcaa = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcba * c1o2 +      mfbba * (vvx - c1o2) + (mfaba + c1o9 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcba        - two* mfbba *  vvx         +  mfaba                  * (one- vx2)              - c1o9 * oMdrho * vx2; 
+			m2 =  mfcba * c1o2 +      mfbba * (vvx + c1o2) + (mfaba + c1o9 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfaba = m0;
+			mfbba = m1;
+			mfcba = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcca * c1o2 +      mfbca * (vvx - c1o2) + (mfaca + c1o36 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcca        - two* mfbca *  vvx         +  mfaca                   * (one- vx2)              - c1o36 * oMdrho * vx2; 
+			m2 =  mfcca * c1o2 +      mfbca * (vvx + c1o2) + (mfaca + c1o36 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfaca = m0;
+			mfbca = m1;
+			mfcca = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcab * c1o2 +      mfbab * (vvx - c1o2) + (mfaab + c1o9 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcab        - two* mfbab *  vvx         +  mfaab                  * (one- vx2)              - c1o9 * oMdrho * vx2; 
+			m2 =  mfcab * c1o2 +      mfbab * (vvx + c1o2) + (mfaab + c1o9 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfaab = m0;
+			mfbab = m1;
+			mfcab = m2;
+			///////////b////////////////////////////////////////////////////////////////////////
+			m0 =  mfcbb * c1o2 +      mfbbb * (vvx - c1o2) + (mfabb + c4o9 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcbb        - two* mfbbb *  vvx         +  mfabb                  * (one- vx2)              - c4o9 * oMdrho * vx2; 
+			m2 =  mfcbb * c1o2 +      mfbbb * (vvx + c1o2) + (mfabb + c4o9 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfabb = m0;
+			mfbbb = m1;
+			mfcbb = m2;
+			///////////b////////////////////////////////////////////////////////////////////////
+			m0 =  mfccb * c1o2 +      mfbcb * (vvx - c1o2) + (mfacb + c1o9 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfccb        - two* mfbcb *  vvx         +  mfacb                  * (one- vx2)              - c1o9 * oMdrho * vx2; 
+			m2 =  mfccb * c1o2 +      mfbcb * (vvx + c1o2) + (mfacb + c1o9 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfacb = m0;
+			mfbcb = m1;
+			mfccb = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+			////////////////////////////////////////////////////////////////////////////////////
+			m0 =  mfcac * c1o2 +      mfbac * (vvx - c1o2) + (mfaac + c1o36 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcac        - two* mfbac *  vvx         +  mfaac                   * (one- vx2)              - c1o36 * oMdrho * vx2; 
+			m2 =  mfcac * c1o2 +      mfbac * (vvx + c1o2) + (mfaac + c1o36 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfaac = m0;
+			mfbac = m1;
+			mfcac = m2;
+			///////////c////////////////////////////////////////////////////////////////////////
+			m0 =  mfcbc * c1o2 +      mfbbc * (vvx - c1o2) + (mfabc + c1o9 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfcbc        - two* mfbbc *  vvx         +  mfabc                  * (one- vx2)              - c1o9 * oMdrho * vx2; 
+			m2 =  mfcbc * c1o2 +      mfbbc * (vvx + c1o2) + (mfabc + c1o9 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfabc = m0;
+			mfbbc = m1;
+			mfcbc = m2;
+			///////////c////////////////////////////////////////////////////////////////////////
+			m0 =  mfccc * c1o2 +      mfbcc * (vvx - c1o2) + (mfacc + c1o36 * oMdrho) * (     vx2 - vvx) * c1o2; 
+			m1 = -mfccc        - two* mfbcc *  vvx         +  mfacc                   * (one- vx2)              - c1o36 * oMdrho * vx2; 
+			m2 =  mfccc * c1o2 +      mfbcc * (vvx + c1o2) + (mfacc + c1o36 * oMdrho) * (     vx2 + vvx) * c1o2;
+			mfacc = m0;
+			mfbcc = m1;
+			mfccc = m2;
+			////////////////////////////////////////////////////////////////////////////////////
+
+			////////////////////////////////////////////////////////////////////////////////////
+			(D.f[ dirE   ])[k   ] = mfabb;                                                                    
+			(D.f[ dirW   ])[kw  ] = mfcbb;                                                                  
+			(D.f[ dirN   ])[k   ] = mfbab;
+			(D.f[ dirS   ])[ks  ] = mfbcb;
+			(D.f[ dirT   ])[k   ] = mfbba;
+			(D.f[ dirB   ])[kb  ] = mfbbc;
+			(D.f[ dirNE  ])[k   ] = mfaab;
+			(D.f[ dirSW  ])[ksw ] = mfccb;
+			(D.f[ dirSE  ])[ks  ] = mfacb;
+			(D.f[ dirNW  ])[kw  ] = mfcab;
+			(D.f[ dirTE  ])[k   ] = mfaba;
+			(D.f[ dirBW  ])[kbw ] = mfcbc;
+			(D.f[ dirBE  ])[kb  ] = mfabc;
+			(D.f[ dirTW  ])[kw  ] = mfcba;
+			(D.f[ dirTN  ])[k   ] = mfbaa;
+			(D.f[ dirBS  ])[kbs ] = mfbcc;
+			(D.f[ dirBN  ])[kb  ] = mfbac;
+			(D.f[ dirTS  ])[ks  ] = mfbca;
+			(D.f[ dirZERO])[k   ] = mfbbb;
+			(D.f[ dirTNE ])[k   ] = mfaaa;
+			(D.f[ dirTSE ])[ks  ] = mfaca;
+			(D.f[ dirBNE ])[kb  ] = mfaac;
+			(D.f[ dirBSE ])[kbs ] = mfacc;
+			(D.f[ dirTNW ])[kw  ] = mfcaa;
+			(D.f[ dirTSW ])[ksw ] = mfcca;
+			(D.f[ dirBNW ])[kbw ] = mfcac;
+			(D.f[ dirBSW ])[kbsw] = mfccc;
+			////////////////////////////////////////////////////////////////////////////////////
+		}                                                                                                                    
+	}
+}
+////////////////////////////////////////////////////////////////////////////////
+
diff --git a/src/VirtualFluids_GPU/Interface_OpenFOAM/Interface.cpp b/src/VirtualFluids_GPU/Interface_OpenFOAM/Interface.cpp
index de7812757..edb304f9b 100644
--- a/src/VirtualFluids_GPU/Interface_OpenFOAM/Interface.cpp
+++ b/src/VirtualFluids_GPU/Interface_OpenFOAM/Interface.cpp
@@ -86,9 +86,12 @@ void Interface::allocArrays_CoordNeighborGeo(Parameter* para) {
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		para->cudaAllocCoord(i);
 		para->cudaAllocSP(i);
-		if (para->getCalcMedian()) 	                       para->cudaAllocMedianSP(i);
-		if (para->getCalcParticle() || para->getUseWale()) para->cudaAllocNeighborWSB(i);
-		if (para->getUseWale())                            para->cudaAllocTurbulentViscosity(i);
+		if (para->getCalcMedian()) 	                       
+			para->cudaAllocMedianSP(i);
+		if (para->getCalcParticle() || para->getUseWale()) 
+			para->cudaAllocNeighborWSB(i);
+		if (para->getUseWale())                            
+			para->cudaAllocTurbulentViscosity(i);
 		//cout <<"Test 2 " <<endl;
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		coordX->initArrayCoord(para->getParH(i)->coordX_SP, i);
diff --git a/src/VirtualFluids_GPU/LBM/Simulation.cpp b/src/VirtualFluids_GPU/LBM/Simulation.cpp
index 18d83ad04..d19e4e8e4 100644
--- a/src/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -565,7 +565,27 @@ void Simulation::run()
 									 0,
 									 para->getForcesDev(),
 									 para->getParD(0)->evenOrOdd); 
-			getLastCudaError("KernelCasSPKum27 execution failed");
+			getLastCudaError("KernelWaleCumOneCompSP27 execution failed");
+
+			//KernelWaleCumAA2016CompSP27(para->getParD(0)->numberofthreads,
+			//							para->getParD(0)->omega,			
+			//							para->getParD(0)->geoSP, 
+			//							para->getParD(0)->neighborX_SP, 
+			//							para->getParD(0)->neighborY_SP, 
+			//							para->getParD(0)->neighborZ_SP,
+			//							para->getParD(0)->neighborWSB_SP,
+			//							para->getParD(0)->vx_SP,        
+			//							para->getParD(0)->vy_SP,        
+			//							para->getParD(0)->vz_SP,        
+			//							para->getParD(0)->d0SP.f[0],
+			//							para->getParD(0)->turbViscosity,
+			//							para->getParD(0)->size_Mat_SP,
+			//							para->getParD(0)->size_Array_SP,
+			//							0,
+			//							para->getForcesDev(),
+			//							para->getParD(0)->evenOrOdd); 
+			//getLastCudaError("KernelWaleCumAA2016CompSP27 execution failed");
+
 		} 
 		else
 		{
@@ -582,6 +602,20 @@ void Simulation::run()
 								 para->getForcesDev(),
 								 para->getParD(0)->evenOrOdd); 
 			getLastCudaError("KernelCasSPKum27 execution failed");
+
+			//KernelKumAA2016CompSP27(para->getParD(0)->numberofthreads,       
+			//					 para->getParD(0)->omega, 
+			//					 para->getParD(0)->geoSP, 
+			//					 para->getParD(0)->neighborX_SP, 
+			//					 para->getParD(0)->neighborY_SP, 
+			//					 para->getParD(0)->neighborZ_SP,
+			//					 para->getParD(0)->d0SP.f[0],    
+			//					 para->getParD(0)->size_Mat_SP,
+			//					 0,
+			//					 para->getForcesDev(),
+			//					 para->getParD(0)->evenOrOdd); 
+			//getLastCudaError("KernelKumAA2016CompSP27 execution failed");
+
 		}
 
 
@@ -1331,7 +1365,7 @@ void Simulation::run()
 		} 
 		  //////////////////////////////////////////////////////////////////////////////////
 		  //calculate the new forcing
-		  if (((t%para->getTStartOut()) == 0) && (t >= para->getTStartOut()))
+		  if (((t%10) == 0) && (t >= 10)/*((t%para->getTStartOut()) == 0) && (t >= para->getTStartOut())*/)
 		  {
 			  forceCalculator->calcPIDControllerForForce(para);
 		  }
@@ -1939,12 +1973,12 @@ void Simulation::run()
 			  for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
 			  {
 				  //output << "start level = " << lev << "\n";
-				  LBCalcMeasurePoints27(para->getParD(lev)->VxMP, para->getParD(lev)->VyMP, para->getParD(lev)->VzMP,
-					  para->getParD(lev)->RhoMP, para->getParD(lev)->kMP, para->getParD(lev)->numberOfPointskMP,
-					  valuesPerClockCycle, t_MP, para->getParD(lev)->geoSP,
-					  para->getParD(lev)->neighborX_SP, para->getParD(lev)->neighborY_SP, para->getParD(lev)->neighborZ_SP,
-					  para->getParD(lev)->size_Mat_SP, para->getParD(lev)->d0SP.f[0], para->getParD(lev)->numberofthreads,
-					  para->getParD(lev)->evenOrOdd);
+				  LBCalcMeasurePoints27(  para->getParD(lev)->VxMP,			para->getParD(lev)->VyMP,			para->getParD(lev)->VzMP,
+										  para->getParD(lev)->RhoMP,		para->getParD(lev)->kMP,			para->getParD(lev)->numberOfPointskMP,
+										  valuesPerClockCycle,				t_MP,								para->getParD(lev)->geoSP,
+										  para->getParD(lev)->neighborX_SP, para->getParD(lev)->neighborY_SP,	para->getParD(lev)->neighborZ_SP,
+										  para->getParD(lev)->size_Mat_SP,	para->getParD(lev)->d0SP.f[0],		para->getParD(lev)->numberofthreads,
+										  para->getParD(lev)->evenOrOdd);
 			  }
 			  t_MP++;
 		  }
@@ -1961,6 +1995,7 @@ void Simulation::run()
 				  {
 					  MeasurePointWriter::writeMeasurePoints(para, lev, j, t);
 				  }
+				  MeasurePointWriter::calcAndWriteMeanAndFluctuations(para, lev, t, para->getTStartOut());
 			  }
 			  t_MP = 0;
 		  }
@@ -2024,7 +2059,7 @@ void Simulation::run()
       // File IO
       ////////////////////////////////////////////////////////////////////////////////
       //comm->startTimer();
-      if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getStartTurn())
+      if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getTStartOut())
       {
 		  //////////////////////////////////////////////////////////////////////////////////
 		  //if (para->getParD(0)->evenOrOdd==true)  para->getParD(0)->evenOrOdd=false;
diff --git a/src/VirtualFluids_GPU/Output/MeasurePointWriter.hpp b/src/VirtualFluids_GPU/Output/MeasurePointWriter.hpp
index 0d0662a64..e72c15e96 100644
--- a/src/VirtualFluids_GPU/Output/MeasurePointWriter.hpp
+++ b/src/VirtualFluids_GPU/Output/MeasurePointWriter.hpp
@@ -11,6 +11,7 @@
 
 //#include "LBM/LB.h"
 //#include "LBM/D3Q27.h"
+#include <numeric>
 #include "basics/utilities/UbFileOutputASCII.h"
 #include "Parameter/Parameter.h"
 
@@ -143,6 +144,126 @@ public:
 		}
 	}
 
+	static void calcAndWriteMeanAndFluctuations(Parameter* para, int level, int t, unsigned int startOfCalculation)
+	{
+		//calc
+		int numberNodes = (int)para->getParH(level)->MP.size();
+		std::vector<double> uMean(numberNodes);
+		std::vector<double> vMean(numberNodes);
+		std::vector<double> wMean(numberNodes);
+		std::vector<double> uuMean(numberNodes);
+		std::vector<double> vvMean(numberNodes);
+		std::vector<double> wwMean(numberNodes);
+		std::vector<double> uvMean(numberNodes);
+		std::vector<double> uwMean(numberNodes);
+		std::vector<double> vwMean(numberNodes);
+
+		int numberSteps = (int)para->getParH(level)->MP[0].Vx.size();
+
+		std::vector<double> uuFluct(t);
+		std::vector<double> vvFluct(t);
+		std::vector<double> wwFluct(t);
+		std::vector<double> uvFluct(t);
+		std::vector<double> uwFluct(t);
+		std::vector<double> vwFluct(t);
+
+		for (int index = 0; index < numberNodes; index++)
+		{
+			//uMean[index] = std::accumulate(para->getParH(level)->MP[index].Vx.begin() + (startOfCalculation - 1), para->getParH(level)->MP[index].Vx.end(), 0.0) / double(t - startOfCalculation);
+			//vMean[index] = std::accumulate(para->getParH(level)->MP[index].Vy.begin() + (startOfCalculation - 1), para->getParH(level)->MP[index].Vy.end(), 0.0) / double(t - startOfCalculation);
+			//wMean[index] = std::accumulate(para->getParH(level)->MP[index].Vz.begin() + (startOfCalculation - 1), para->getParH(level)->MP[index].Vz.end(), 0.0) / double(t - startOfCalculation);
+
+			uMean[index] = 0.0;
+			vMean[index] = 0.0;
+			wMean[index] = 0.0;
+
+			for (int step = startOfCalculation; step < t; step++)
+			{
+				uMean[index] += para->getParH(level)->MP[index].Vx[step];
+				vMean[index] += para->getParH(level)->MP[index].Vy[step];
+				wMean[index] += para->getParH(level)->MP[index].Vz[step];
+			}
+
+			uMean[index] /= double(t - startOfCalculation);
+			vMean[index] /= double(t - startOfCalculation);
+			wMean[index] /= double(t - startOfCalculation);
+
+			for (int step = 0; step < t; step++)
+			{
+				uuFluct[step] = (para->getParH(level)->MP[index].Vx[step] - uMean[index]) * (para->getParH(level)->MP[index].Vx[step] - uMean[index]);
+				vvFluct[step] = (para->getParH(level)->MP[index].Vy[step] - vMean[index]) * (para->getParH(level)->MP[index].Vy[step] - vMean[index]);
+				wwFluct[step] = (para->getParH(level)->MP[index].Vz[step] - wMean[index]) * (para->getParH(level)->MP[index].Vz[step] - wMean[index]);
+
+				uvFluct[step] = (para->getParH(level)->MP[index].Vx[step] - uMean[index]) * (para->getParH(level)->MP[index].Vy[step] - vMean[index]);
+				uwFluct[step] = (para->getParH(level)->MP[index].Vx[step] - uMean[index]) * (para->getParH(level)->MP[index].Vz[step] - wMean[index]);
+				vwFluct[step] = (para->getParH(level)->MP[index].Vy[step] - vMean[index]) * (para->getParH(level)->MP[index].Vz[step] - wMean[index]);
+			}
+
+			//uuMean[index] = std::accumulate(uuFluct.begin() + (startOfCalculation - 1), uuFluct.end(), 0.0) / double(t - startOfCalculation);
+			//vvMean[index] = std::accumulate(vvFluct.begin() + (startOfCalculation - 1), vvFluct.end(), 0.0) / double(t - startOfCalculation);
+			//wwMean[index] = std::accumulate(wwFluct.begin() + (startOfCalculation - 1), wwFluct.end(), 0.0) / double(t - startOfCalculation);
+
+			//uvMean[index] = std::accumulate(uvFluct.begin() + (startOfCalculation - 1), uvFluct.end(), 0.0) / double(t - startOfCalculation);
+			//uwMean[index] = std::accumulate(uwFluct.begin() + (startOfCalculation - 1), uwFluct.end(), 0.0) / double(t - startOfCalculation);
+			//vwMean[index] = std::accumulate(vwFluct.begin() + (startOfCalculation - 1), vwFluct.end(), 0.0) / double(t - startOfCalculation);
+
+			uuMean[index] = 0.0;
+			vvMean[index] = 0.0;
+			wwMean[index] = 0.0;
+
+			uvMean[index] = 0.0;
+			uwMean[index] = 0.0;
+			vwMean[index] = 0.0;
+
+			for (int step = startOfCalculation; step < t; step++)
+			{
+				uuMean[index] += uuFluct[step];
+				vvMean[index] += vvFluct[step];
+				wwMean[index] += wwFluct[step];
+
+				uvMean[index] += uvFluct[step];
+				uwMean[index] += uwFluct[step];
+				vwMean[index] += vwFluct[step];
+			}
+
+			uuMean[index] /= double(t - startOfCalculation);
+			vvMean[index] /= double(t - startOfCalculation);
+			wwMean[index] /= double(t - startOfCalculation);
+
+			uvMean[index] /= double(t - startOfCalculation);
+			uwMean[index] /= double(t - startOfCalculation);
+			vwMean[index] /= double(t - startOfCalculation);
+
+		}
+
+		std::cout << "level: " << level << ", t: " << t << ", startOfCalculation= " << startOfCalculation << ", numberSteps: " << numberSteps << std::endl;
+
+		//write
+		UbFileOutputASCII out(para->getFName() + "_MeanAndFluct_" + std::to_string(level) + "_" + std::to_string(t) + ".dat");
+
+		out.writeString("Level:");
+		out.writeInteger(level);
+		out.writeLine();
+		out.writeString("PointName uMean  vMean  wMean  uuMean  vvMean  wwMean  uvMean  uwMean  vwMean");
+		out.writeLine();
+		out.writeInteger(numberNodes);
+		out.writeLine();
+		for (int index = 0; index < numberNodes; index++)
+		{
+			out.writeString(para->getParH(level)->MP[index].name);
+			out.writeFloat((float)(uMean[index]  * para->getVelocityRatio()));
+			out.writeFloat((float)(vMean[index]  * para->getVelocityRatio()));
+			out.writeFloat((float)(wMean[index]  * para->getVelocityRatio()));
+			out.writeFloat((float)(uuMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeFloat((float)(vvMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeFloat((float)(wwMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeFloat((float)(uvMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeFloat((float)(uwMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeFloat((float)(vwMean[index] * para->getVelocityRatio() * para->getVelocityRatio()));
+			out.writeLine();
+		}
+		out.writeLine();
+	}
 
 protected:
 
diff --git a/src/VirtualFluids_GPU/Output/WriteData.cpp b/src/VirtualFluids_GPU/Output/WriteData.cpp
index 3475146af..715c6f217 100644
--- a/src/VirtualFluids_GPU/Output/WriteData.cpp
+++ b/src/VirtualFluids_GPU/Output/WriteData.cpp
@@ -186,6 +186,8 @@ void writeTimestep(Parameter* para, unsigned int t)
 	////////////////////////////////////////////////////////////////////////////////
 	for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
 	{
+		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+		if (para->getUseWale())    para->cudaCopyTurbulentViscosityDH(lev);
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		const unsigned int numberOfParts = para->getParH(lev)->size_Mat_SP / para->getlimitOfNodesForVTK() + 1;
 		std::vector<std::string> fname;
@@ -240,8 +242,14 @@ void writeTimestep(Parameter* para, unsigned int t)
 			//UnstrucuredGridWriter::writeUnstrucuredGridBig(para, lev, ffname_bin, ffname_bin2);
 
 
-
-			UnstrucuredGridWriter::writeUnstrucuredGridLT(para, lev, fname);
+			if (para->getUseWale())
+			{
+				UnstrucuredGridWriter::writeUnstrucuredGridLTwithTurbulentViscosity(para, lev, fname);
+			}
+			else
+			{
+				UnstrucuredGridWriter::writeUnstrucuredGridLT(para, lev, fname);
+			}
 
 			//2nd and 3rd Moments
 			if (para->getCalc2ndOrderMoments())  UnstrucuredGridWriter::writeUnstrucuredGridEff2ndMomentsLT(para, lev, fname2ndMoments);
diff --git a/src/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/VirtualFluids_GPU/Parameter/Parameter.cpp
index 3f024e0e9..6adf1e9af 100644
--- a/src/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -571,7 +571,7 @@ void Parameter::cudaAllocTurbulentViscosity(int lev)
 	//Device						 
 	checkCudaErrors(cudaMalloc((void**) &(parD[lev]->turbViscosity), parD[lev]->mem_size_doubflo_SP));
 	//////////////////////////////////////////////////////////////////////////
-	double tmp = (double)parH[lev]->mem_size_int_SP;
+	double tmp = (double)parH[lev]->mem_size_doubflo_SP;
 	setMemsizeGPU(tmp, false);
 }
 void Parameter::cudaCopyTurbulentViscosityHD(int lev)
-- 
GitLab