Skip to content
Snippets Groups Projects
GPU_Interface.h 81.55 KiB
//  _    ___      __              __________      _     __        ______________   __
// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
//
//////////////////////////////////////////////////////////////////////////
#ifndef GPU_INTERFACE_H
#define GPU_INTERFACE_H

#include "LBM/LB.h"

#include <cuda_runtime.h>
#include <curand.h>
#include <curand_kernel.h>

struct LBMSimulationParameter;
class Parameter;

//////////////////////////////////////////////////////////////////////////
//Kernel
//////////////////////////////////////////////////////////////////////////
void KernelCas27(unsigned int grid_nx, 
                            unsigned int grid_ny, 
                            unsigned int grid_nz, 
                            real s9,
                            unsigned int* bcMatD,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
                            real* DD,
                            unsigned long long numberOfLBnodes,
                            bool EvenOrOdd);

void KernelCasSP27(unsigned int numberOfThreads, 
                              real s9,
                              unsigned int* bcMatD,
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              real* DD,
                              unsigned long long numberOfLBnodes,
                              bool EvenOrOdd);

void KernelCasSPMS27(unsigned int numberOfThreads, 
                                real s9,
                                unsigned int* bcMatD,
                                unsigned int* neighborX,
                                unsigned int* neighborY,
                                unsigned int* neighborZ,
                                real* DD,
                                unsigned long long numberOfLBnodes,
                                bool EvenOrOdd);

void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
                                   real s9,
                                   unsigned int* bcMatD,
                                   unsigned int* neighborX,
                                   unsigned int* neighborY,
                                   unsigned int* neighborZ,
                                   real* DD,
                                   unsigned long long numberOfLBnodes,
                                   bool EvenOrOdd);

void KernelKumCompSRTSP27(
	unsigned int numberOfThreads,
	real omega,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	real* DDStart,
	unsigned long long numberOfLBnodes,
	int level,
	real* forces,
	bool EvenOrOdd);

void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
									    real s9,
									    unsigned int* bcMatD,
									    unsigned int* neighborX,
									    unsigned int* neighborY,
									    unsigned int* neighborZ,
									    real* DD,
									    unsigned long long numberOfLBnodes,
									    int level,
									    real* forces,
									    bool EvenOrOdd);

void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, 
											real s9,
											unsigned int* bcMatD,
											unsigned int* neighborX,
											unsigned int* neighborY,
											unsigned int* neighborZ,
											real* DD,
											unsigned long long numberOfLBnodes,
											int size_Array,
											int level,
											real* forces,
											bool EvenOrOdd);

void KernelKum1hSP27(    unsigned int numberOfThreads, 
									real omega,
									real deltaPhi,
									real angularVelocity,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									real* coordX,
									real* coordY,
									real* coordZ,
									real* DDStart,
									unsigned long long numberOfLBnodes,
									bool EvenOrOdd);

void KernelCascadeSP27(unsigned int numberOfThreads, 
								  real s9,
								  unsigned int* bcMatD,
								  unsigned int* neighborX,
								  unsigned int* neighborY,
								  unsigned int* neighborZ,
								  real* DD,
								  unsigned long long numberOfLBnodes,
								  bool EvenOrOdd);

void KernelKumNewSP27(   unsigned int numberOfThreads, 
									real s9,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									real* DD,
									unsigned long long numberOfLBnodes,
									bool EvenOrOdd);


void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
	unsigned int numberOfThreads,
	real s9,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	real* DD,
	unsigned long long numberOfLBnodes,
	int size_Array,
	int level,
	real* forces,
	bool EvenOrOdd);

void CumulantOnePreconditionedChimCompSP27(
	unsigned int numberOfThreads,
	real s9,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	real* DD,
	unsigned long long numberOfLBnodes,
	int size_Array,
	int level,
	real* forces,
	bool EvenOrOdd);

void CumulantOneChimCompSP27(
	unsigned int numberOfThreads,
	real s9,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	real* DD,
	unsigned long long numberOfLBnodes,
	int size_Array,
	int level,
	real* forces,
	bool EvenOrOdd);


void KernelKumIsoTestSP27(unsigned int numberOfThreads, 
									 real s9,
									 unsigned int* bcMatD,
									 unsigned int* neighborX,
									 unsigned int* neighborY,
									 unsigned int* neighborZ,
									 real* DD,
									 real* dxxUx,
									 real* dyyUy,
									 real* dzzUz,
									 unsigned long long numberOfLBnodes,
									 bool EvenOrOdd);

void KernelKumCompSP27(  unsigned int numberOfThreads, 
									real s9,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									real* DD,
									unsigned long long numberOfLBnodes,
									bool EvenOrOdd);

void KernelWaleBySoniMalavCumAA2016CompSP27(
	unsigned int numberOfThreads,
	real s9,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	unsigned int* neighborWSB,
	real* veloX,
	real* veloY,
	real* veloZ,
	real* DD,
	real* turbulentViscosity,
	unsigned long long numberOfLBnodes,
	int size_Array,
	int level,
	real* forces,
	bool EvenOrOdd);

void KernelPMCumOneCompSP27(unsigned int numberOfThreads, 
									   real omega,
									   unsigned int* neighborX,
									   unsigned int* neighborY,
									   unsigned int* neighborZ,
									   real* DD,
									   unsigned long long numberOfLBnodes,
									   int level,
									   real* forces,
									   real porosity,
									   real darcy,
									   real forchheimer,
									   unsigned int sizeOfPorousMedia,
									   unsigned int* nodeIdsPorousMedia, 
									   bool EvenOrOdd);

void KernelADincomp7(   unsigned int numberOfThreads, 
								   real diffusivity,
								   unsigned int* bcMatD,
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   real* DD,
								   real* DD7,
								   unsigned long long numberOfLBnodes,
								   bool EvenOrOdd);

void KernelADincomp27(   unsigned int numberOfThreads, 
									real diffusivity,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									real* DD,
									real* DD7,
									unsigned long long numberOfLBnodes,
									bool EvenOrOdd);

void Init27(int myid,
                       int numprocs,
                       real u0,
                       unsigned int* geoD,
                       unsigned int* neighborX,
                       unsigned int* neighborY,
                       unsigned int* neighborZ,
                       real* vParab,
                       unsigned long long numberOfLBnodes,
                       unsigned int grid_nx, 
                       unsigned int grid_ny, 
                       unsigned int grid_nz, 
                       real* DD,
                       int level,
                       int maxlevel);

void InitNonEqPartSP27(unsigned int numberOfThreads,
                                  unsigned int* neighborX,
                                  unsigned int* neighborY,
                                  unsigned int* neighborZ,
                                  unsigned int* neighborWSB,
                                  unsigned int* geoD,
                                  real* rho,
                                  real* ux,
                                  real* uy,
                                  real* uz,
                                  unsigned long long numberOfLBnodes,
                                  real* DD,
                                  real omega,
                                  bool EvenOrOdd);


void InitThS7(  unsigned int numberOfThreads,
                           unsigned int* neighborX,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
                           unsigned int* geoD,
                           real* Conc,
                           real* ux,
                           real* uy,
                           real* uz,
                           unsigned long long numberOfLBnodes,
                           real* DD7,
                           bool EvenOrOdd);

void InitADDev27( unsigned int numberOfThreads,
                           unsigned int* neighborX,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
                           unsigned int* geoD,
                           real* Conc,
                           real* ux,
                           real* uy,
                           real* uz,
                           unsigned long long numberOfLBnodes,
                           real* DD27,
                           bool EvenOrOdd);

void PostProcessorF3_2018Fehlberg(
	unsigned int numberOfThreads,
	real omega,
	unsigned int* bcMatD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	real* rhoOut,
	real* vxOut,
	real* vyOut,
	real* vzOut,
	real* DDStart,
	real* G6,
	unsigned long long numberOfLBnodes,
	int level,
	real* forces,
	bool EvenOrOdd);

void CalcMac27( real* vxD,
                          real* vyD,
                          real* vzD,
                          real* rhoD,
                          unsigned int* geoD,
                          unsigned int* neighborX,
                          unsigned int* neighborY,
                          unsigned int* neighborZ,
                          unsigned long long numberOfLBnodes,
                          unsigned int grid_nx, 
                          unsigned int grid_ny, 
                          unsigned int grid_nz, 
                          real* DD,
                          bool isEvenTimestep);

void CalcMacSP27(real* vxD,
                            real* vyD,
                            real* vzD,
                            real* rhoD,
                            real* pressD,
                            unsigned int* geoD,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
                            unsigned long long numberOfLBnodes,
                            unsigned int numberOfThreads, 
                            real* DD,
                            bool isEvenTimestep);

void CalcMacCompSP27(real* vxD,
								real* vyD,
								real* vzD,
								real* rhoD,
								real* pressD,
								unsigned int* geoD,
								unsigned int* neighborX,
								unsigned int* neighborY,
								unsigned int* neighborZ,
								unsigned long long numberOfLBnodes,
								unsigned int numberOfThreads, 
								real* DD,
								bool isEvenTimestep);

void CalcMacThS7(  real* Conc,
                              unsigned int* geoD,
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              unsigned long long numberOfLBnodes,
                              unsigned int numberOfThreads, 
                              real* DD7,
                              bool isEvenTimestep);

void PlaneConcThS7(real* Conc,
							  int* kPC,
							  unsigned int numberOfPointskPC,
							  unsigned int* geoD,
							  unsigned int* neighborX,
							  unsigned int* neighborY,
							  unsigned int* neighborZ,
							  unsigned long long numberOfLBnodes,
							  unsigned int numberOfThreads, 
							  real* DD7,
							  bool isEvenTimestep);

void PlaneConcThS27(real* Conc,
							   int* kPC,
							   unsigned int numberOfPointskPC,
							   unsigned int* geoD,
							   unsigned int* neighborX,
							   unsigned int* neighborY,
							   unsigned int* neighborZ,
							   unsigned long long numberOfLBnodes,
							   unsigned int numberOfThreads, 
							   real* DD27,
							   bool isEvenTimestep);

void CalcConcentration27( unsigned int numberOfThreads,
	                                 real* Conc,
                                     unsigned int* geoD,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     unsigned long long numberOfLBnodes,
                                     real* DD27,
                                     bool isEvenTimestep);

void CalcMedSP27(  real* vxD,
                              real* vyD,
                              real* vzD,
                              real* rhoD,
                              real* pressD,
                              unsigned int* geoD,
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              unsigned long long numberOfLBnodes,
                              unsigned int numberOfThreads, 
                              real* DD,
                              bool isEvenTimestep);

void CalcMedCompSP27(real* vxD,
								real* vyD,
								real* vzD,
								real* rhoD,
								real* pressD,
								unsigned int* geoD,
								unsigned int* neighborX,
								unsigned int* neighborY,
								unsigned int* neighborZ,
								unsigned long long numberOfLBnodes,
								unsigned int numberOfThreads, 
								real* DD,
								bool isEvenTimestep);

void CalcMedCompAD27(
	real* vxD,
	real* vyD,
	real* vzD,
	real* rhoD,
	real* pressD,
	real* concD,
	unsigned int* geoD,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	unsigned long long numberOfLBnodes,
	unsigned int numberOfThreads,
	real* DD,
	real* DD_AD,
	bool isEvenTimestep);

void CalcMacMedSP27(  real* vxD,
                                 real* vyD,
                                 real* vzD,
                                 real* rhoD,
                                 real* pressD,
                                 unsigned int* geoD,
                                 unsigned int* neighborX,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
                                 unsigned int tdiff,
                                 unsigned long long numberOfLBnodes,
                                 unsigned int numberOfThreads, 
                                 bool isEvenTimestep);

void ResetMedianValuesSP27(
	real* vxD,
	real* vyD,
	real* vzD,
	real* rhoD,
	real* pressD,
	unsigned long long numberOfLBnodes,
	unsigned int numberOfThreads,
	bool isEvenTimestep);

void ResetMedianValuesAD27(
	real* vxD,
	real* vyD,
	real* vzD,
	real* rhoD,
	real* pressD,
	real* concD,
	unsigned long long numberOfLBnodes,
	unsigned int numberOfThreads,
	bool isEvenTimestep);

void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
										 real* kyzFromfcNEQ,
										 real* kxzFromfcNEQ,
										 real* kxxMyyFromfcNEQ,
										 real* kxxMzzFromfcNEQ,
										 unsigned int* geoD,
										 unsigned int* neighborX,
										 unsigned int* neighborY,
										 unsigned int* neighborZ,
										 unsigned long long numberOfLBnodes,
										 unsigned int numberOfThreads, 
										 real* DD,
										 bool isEvenTimestep);

void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
									   real* kyzFromfcNEQ,
									   real* kxzFromfcNEQ,
									   real* kxxMyyFromfcNEQ,
									   real* kxxMzzFromfcNEQ,
									   unsigned int* geoD,
									   unsigned int* neighborX,
									   unsigned int* neighborY,
									   unsigned int* neighborZ,
									   unsigned long long numberOfLBnodes,
									   unsigned int numberOfThreads, 
									   real* DD,
									   bool isEvenTimestep);

void Calc3rdMomentsIncompSP27(real* CUMbbb,
										 real* CUMabc,
										 real* CUMbac,
										 real* CUMbca,
										 real* CUMcba,
										 real* CUMacb,
										 real* CUMcab,
										 unsigned int* geoD,
										 unsigned int* neighborX,
										 unsigned int* neighborY,
										 unsigned int* neighborZ,
										 unsigned long long numberOfLBnodes,
										 unsigned int numberOfThreads, 
										 real* DD,
										 bool isEvenTimestep);

void Calc3rdMomentsCompSP27(real* CUMbbb,
									   real* CUMabc,
									   real* CUMbac,
									   real* CUMbca,
									   real* CUMcba,
									   real* CUMacb,
									   real* CUMcab,
									   unsigned int* geoD,
									   unsigned int* neighborX,
									   unsigned int* neighborY,
									   unsigned int* neighborZ,
									   unsigned long long numberOfLBnodes,
									   unsigned int numberOfThreads, 
									   real* DD,
									   bool isEvenTimestep);

void CalcHigherMomentsIncompSP27(real* CUMcbb,
											real* CUMbcb,
											real* CUMbbc,
											real* CUMcca,
											real* CUMcac,
											real* CUMacc,
											real* CUMbcc,
											real* CUMcbc,
											real* CUMccb,
											real* CUMccc,
											unsigned int* geoD,
											unsigned int* neighborX,
											unsigned int* neighborY,
											unsigned int* neighborZ,
											unsigned long long numberOfLBnodes,
											unsigned int numberOfThreads, 
											real* DD,
											bool isEvenTimestep);

void CalcHigherMomentsCompSP27(real* CUMcbb,
										  real* CUMbcb,
										  real* CUMbbc,
										  real* CUMcca,
										  real* CUMcac,
										  real* CUMacc,
										  real* CUMbcc,
										  real* CUMcbc,
										  real* CUMccb,
										  real* CUMccc,
										  unsigned int* geoD,
										  unsigned int* neighborX,
										  unsigned int* neighborY,
										  unsigned int* neighborZ,
										  unsigned long long numberOfLBnodes,
										  unsigned int numberOfThreads, 
										  real* DD,
										  bool isEvenTimestep);

void LBCalcMeasurePoints27(real* vxMP,
                                      real* vyMP,
                                      real* vzMP,
                                      real* rhoMP,
                                      unsigned int* kMP,
                                      unsigned int numberOfPointskMP,
                                      unsigned int MPClockCycle,
                                      unsigned int t,
                                      unsigned int* geoD,
                                      unsigned int* neighborX,
                                      unsigned int* neighborY,
                                      unsigned int* neighborZ,
                                      unsigned long long numberOfLBnodes,
                                      real* DD,
                                      unsigned int numberOfThreads, 
                                      bool isEvenTimestep);

void BcPress27(int nx, 
                          int ny, 
                          int tz, 
                          unsigned int grid_nx, 
                          unsigned int grid_ny, 
                          unsigned int* bcMatD, 
                          unsigned int* neighborX,
                          unsigned int* neighborY,
                          unsigned int* neighborZ,
                          real* DD, 
                          unsigned long long numberOfLBnodes, 
                          bool isEvenTimestep);

void BcVel27(int nx, 
                        int ny, 
                        int nz, 
                        int itz, 
                        unsigned int grid_nx, 
                        unsigned int grid_ny, 
                        unsigned int* bcMatD, 
                        unsigned int* neighborX,
                        unsigned int* neighborY,
                        unsigned int* neighborZ,
                        real* DD, 
                        unsigned long long numberOfLBnodes, 
                        bool isEvenTimestep, 
                        real u0x, 
                        real om);

void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QDevCompThinWalls27(unsigned int numberOfThreads,
									real* DD, 
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* geom,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned int* neighborWSB,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QDevIncompHighNu27(  unsigned int numberOfThreads,
									 real* DD, 
									 int* k_Q, 
									 real* QQ,
									 unsigned int numberOfBCnodes, 
									 real om1, 
									 unsigned int* neighborX,
									 unsigned int* neighborY,
									 unsigned int* neighborZ,
									 unsigned long long numberOfLBnodes, 
									 bool isEvenTimestep);

void QDevCompHighNu27(unsigned int numberOfThreads,
								 real* DD, 
								 int* k_Q, 
								 real* QQ,
								 unsigned int numberOfBCnodes, 
								 real om1, 
								 unsigned int* neighborX,
								 unsigned int* neighborY,
								 unsigned int* neighborZ,
								 unsigned long long numberOfLBnodes, 
								 bool isEvenTimestep);

void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
	
void QVelDeviceCouette27(unsigned int numberOfThreads,
									real* vx,
									real* vy,
									real* vz,
									real* DD,
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QVelDevice1h27( unsigned int numberOfThreads,
								int nx,
								int ny,
								real* vx,
								real* vy,
								real* vz,
								real* DD, 
								int* k_Q, 
								real* QQ,
								unsigned int numberOfBCnodes, 
								real om1, 
								real Phi, 
								real angularVelocity,
								unsigned int* neighborX,
								unsigned int* neighborY,
								unsigned int* neighborZ,
								real* coordX,
								real* coordY,
								real* coordZ,
								unsigned long long numberOfLBnodes, 
								bool isEvenTimestep);

void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
									  real* vx,
									  real* vy,
									  real* vz,
									  real* DD, 
									  int* k_Q, 
									  real* QQ,
									  unsigned int numberOfBCnodes, 
									  real om1, 
									  unsigned int* neighborX,
									  unsigned int* neighborY,
									  unsigned int* neighborZ,
									  unsigned long long numberOfLBnodes, 
									  bool isEvenTimestep);

void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QVelDevCompThinWalls27(unsigned int numberOfThreads,
							           real* vx,
							           real* vy,
							           real* vz,
							           real* DD, 
							           int* k_Q, 
							           real* QQ,
							           unsigned int numberOfBCnodes, 
							           real om1, 
									   unsigned int* geom,
							           unsigned int* neighborX,
							           unsigned int* neighborY,
							           unsigned int* neighborZ,
									   unsigned int* neighborWSB,
							           unsigned long long numberOfLBnodes, 
							           bool isEvenTimestep);

void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
										real* vx,
										real* vy,
										real* vz,
										real* DD, 
										int* k_Q, 
										real* QQ,
										unsigned int numberOfBCnodes, 
										real om1, 
										unsigned int* neighborX,
										unsigned int* neighborY,
										unsigned int* neighborZ,
										unsigned long long numberOfLBnodes, 
										bool isEvenTimestep);

void QVelDevCompHighNu27(unsigned int numberOfThreads,
									real* vx,
									real* vy,
									real* vz,
									real* DD, 
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QVeloDevEQ27(unsigned int numberOfThreads,
							 real* VeloX,
							 real* VeloY,
							 real* VeloZ,
							 real* DD, 
							 int* k_Q, 
							 int numberOfBCnodes, 
							 real om1, 
							 unsigned int* neighborX,
							 unsigned int* neighborY,
							 unsigned int* neighborZ,
							 unsigned long long numberOfLBnodes, 
							 bool isEvenTimestep);

void QVeloStreetDevEQ27(
	uint  numberOfThreads,
	real* veloXfraction,
	real* veloYfraction,
	int*  naschVelo,
	real* DD,
	int*  naschIndex,
	int   numberOfStreetNodes,
	real  velocityRatio,
	uint* neighborX,
	uint* neighborY,
	uint* neighborZ,
	uint  size_Mat,
	bool  isEvenTimestep);

void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QSlipGeomDevComp27( unsigned int numberOfThreads,
									real* DD, 
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes,
									real om1, 
									real* NormalX,
									real* NormalY,
									real* NormalZ,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QSlipNormDevComp27(unsigned int numberOfThreads,
								   real* DD, 
								   int* k_Q, 
								   real* QQ,
								   unsigned int numberOfBCnodes,
								   real om1, 
								   real* NormalX,
								   real* NormalY,
								   real* NormalZ,
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep);

void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);

void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);

void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);

void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                       real* rhoBC,
                                       real* DD, 
                                       int* k_Q, 
                                       unsigned int numberOfBCnodes, 
                                       real om1, 
                                       unsigned int* neighborX,
                                       unsigned int* neighborY,
                                       unsigned int* neighborZ,
                                       unsigned long long numberOfLBnodes, 
                                       bool isEvenTimestep);

void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                     real* rhoBC,
                                     real* DD, 
                                     int* k_Q, 
                                     unsigned int numberOfBCnodes, 
                                     real om1, 
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     unsigned long long numberOfLBnodes, 
                                     bool isEvenTimestep);

void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDevOld27(unsigned int numberOfThreads,
                               real* rhoBC,
                               real* DD, 
                               int* k_Q, 
                               int* k_N, 
                               unsigned int numberOfBCnodes, 
                               real om1, 
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
                               unsigned long long numberOfLBnodes, 
                               bool isEvenTimestep);

void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDevZero27(unsigned int numberOfThreads,
                                real* DD, 
                                int* k_Q, 
                                unsigned int numberOfBCnodes, 
                                unsigned int* neighborX,
                                unsigned int* neighborY,
                                unsigned int* neighborZ,
                                unsigned long long numberOfLBnodes, 
                                bool isEvenTimestep);

void QPressDevFake27(   unsigned int numberOfThreads,
								   real* rhoBC,
								   real* DD, 
								   int* k_Q, 
								   int* k_N, 
								   unsigned int numberOfBCnodes, 
								   real om1, 
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep);

void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);

void QPressDev27_IntBB(  unsigned int numberOfThreads,
									real* rho,
									real* DD, 
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QPressDevAntiBB27(  unsigned int numberOfThreads,
								  real* rhoBC,
								  real* vx,
								  real* vy,
								  real* vz,
								  real* DD, 
								  int* k_Q, 
								  real* QQ,
								  int numberOfBCnodes, 
								  real om1, 
								  unsigned int* neighborX,
								  unsigned int* neighborY,
								  unsigned int* neighborZ,
								  unsigned long long numberOfLBnodes, 
								  bool isEvenTimestep);

void PressSchlaffer27(unsigned int numberOfThreads,
                                 real* rhoBC,
                                 real* DD,
                                 real* vx0,
                                 real* vy0,
                                 real* vz0,
                                 real* deltaVz0,
                                 int* k_Q, 
                                 int* k_N, 
                                 int numberOfBCnodes, 
                                 real om1, 
                                 unsigned int* neighborX,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
                                 unsigned long long numberOfLBnodes, 
                                 bool isEvenTimestep);

void VelSchlaffer27(  unsigned int numberOfThreads,
                                 int t,
                                 real* DD,
                                 real* vz0,
                                 real* deltaVz0,
                                 int* k_Q, 
                                 int* k_N, 
                                 int numberOfBCnodes, 
                                 real om1, 
                                 unsigned int* neighborX,
                                 unsigned int* neighborY,
                                 unsigned int* neighborZ,
                                 unsigned long long numberOfLBnodes, 
                                 bool isEvenTimestep);

void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);

void PrecursorDevEQ27(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);

void PrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);

void QPrecursorDevDistributions(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio);

void QADDev7(unsigned int numberOfThreads,
                        real* DD, 
                        real* DD7,
                        real* temp,
                        real diffusivity,
                        int* k_Q, 
                        real* QQ,
                        unsigned int numberOfBCnodes, 
                        real om1, 
                        unsigned int* neighborX,
                        unsigned int* neighborY,
                        unsigned int* neighborZ,
                        unsigned long long numberOfLBnodes, 
                        bool isEvenTimestep);

//////////////////////////////////////////////////////////////////////////
//! \brief Advection Diffusion kernel
void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
	uint numberOfThreads,
	real omegaDiffusivity,
	uint* typeOfGridNode,
	uint* neighborX,
	uint* neighborY,
	uint* neighborZ,
	real* distributions,
	real* distributionsAD,
	unsigned long long numberOfLBnodes,
	real* forces,
	bool isEvenTimestep);

//////////////////////////////////////////////////////////////////////////
//! \brief defines the behavior of a slip-AD boundary condition
void ADSlipVelDevComp(
	uint numberOfThreads,
	real * normalX,
	real * normalY,
	real * normalZ,
	real * distributions,
	real * distributionsAD,
	int* QindexArray,
	real * Qarrays,
	uint numberOfBCnodes,
	real omegaDiffusivity,
	uint * neighborX,
	uint * neighborY,
	uint * neighborZ,
	unsigned long long numberOfLBnodes,
	bool isEvenTimestep);
	
void QADDirichletDev27( unsigned int numberOfThreads,
								   real* DD, 
								   real* DD27,
								   real* temp,
								   real diffusivity,
								   int* k_Q, 
								   real* QQ,
								   unsigned int numberOfBCnodes, 
								   real om1, 
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep);

void QADBBDev27(  unsigned int numberOfThreads,
							 real* DD, 
							 real* DD27,
							 real* temp,
							 real diffusivity,
							 int* k_Q, 
							 real* QQ,
							 unsigned int numberOfBCnodes, 
							 real om1, 
							 unsigned int* neighborX,
							 unsigned int* neighborY,
							 unsigned int* neighborZ,
							 unsigned long long numberOfLBnodes, 
							 bool isEvenTimestep);

void QADVelDev7(unsigned int numberOfThreads,
                           real* DD, 
                           real* DD7,
                           real* temp,
                           real* velo,
                           real diffusivity,
                           int* k_Q, 
                           real* QQ,
                           unsigned int numberOfBCnodes, 
                           real om1, 
                           unsigned int* neighborX,
                           unsigned int* neighborY,
                           unsigned int* neighborZ,
                           unsigned long long numberOfLBnodes, 
                           bool isEvenTimestep);


void QADVelDev27(  unsigned int numberOfThreads,
                              real* DD, 
                              real* DD27,
                              real* temp,
                              real* velo,
                              real diffusivity,
                              int* k_Q, 
                              real* QQ,
                              unsigned int numberOfBCnodes, 
                              real om1, 
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              unsigned long long numberOfLBnodes, 
                              bool isEvenTimestep);

void QADPressDev7( unsigned int numberOfThreads,
                              real* DD, 
                              real* DD7,
                              real* temp,
                              real* velo,
                              real diffusivity,
                              int* k_Q, 
                              real* QQ,
                              unsigned int numberOfBCnodes, 
                              real om1, 
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              unsigned long long numberOfLBnodes, 
                              bool isEvenTimestep);

void QADPressDev27(unsigned int numberOfThreads,
                              real* DD, 
                              real* DD27,
                              real* temp,
                              real* velo,
                              real diffusivity,
                              int* k_Q, 
                              real* QQ,
                              unsigned int numberOfBCnodes, 
                              real om1, 
                              unsigned int* neighborX,
                              unsigned int* neighborY,
                              unsigned int* neighborZ,
                              unsigned long long numberOfLBnodes, 
                              bool isEvenTimestep);

void QADPressNEQNeighborDev27(
											unsigned int numberOfThreads,
											real* DD,
											real* DD27,
											int* k_Q,
											int* k_N,
											int numberOfBCnodes,
											unsigned int* neighborX,
											unsigned int* neighborY,
											unsigned int* neighborZ,
											unsigned long long numberOfLBnodes,
											bool isEvenTimestep
										);

void QNoSlipADincompDev7(unsigned int numberOfThreads,
									real* DD, 
									real* DD7,
									real* temp,
									real diffusivity,
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QNoSlipADincompDev27(unsigned int numberOfThreads,
									 real* DD, 
									 real* DD27,
									 real* temp,
									 real diffusivity,
									 int* k_Q, 
									 real* QQ,
									 unsigned int numberOfBCnodes, 
									 real om1, 
									 unsigned int* neighborX,
									 unsigned int* neighborY,
									 unsigned int* neighborZ,
									 unsigned long long numberOfLBnodes, 
									 bool isEvenTimestep);

void QADVeloIncompDev7( unsigned int numberOfThreads,
								   real* DD, 
								   real* DD7,
								   real* temp,
								   real* velo,
								   real diffusivity,
								   int* k_Q, 
								   real* QQ,
								   unsigned int numberOfBCnodes, 
								   real om1, 
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep);


void QADVeloIncompDev27( unsigned int numberOfThreads,
									real* DD, 
									real* DD27,
									real* temp,
									real* velo,
									real diffusivity,
									int* k_Q, 
									real* QQ,
									unsigned int numberOfBCnodes, 
									real om1, 
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned long long numberOfLBnodes, 
									bool isEvenTimestep);

void QADPressIncompDev7(  unsigned int numberOfThreads,
									 real* DD, 
									 real* DD7,
									 real* temp,
									 real* velo,
									 real diffusivity,
									 int* k_Q, 
									 real* QQ,
									 unsigned int numberOfBCnodes, 
									 real om1, 
									 unsigned int* neighborX,
									 unsigned int* neighborY,
									 unsigned int* neighborZ,
									 unsigned long long numberOfLBnodes, 
									 bool isEvenTimestep);

void QADPressIncompDev27(  unsigned int numberOfThreads,
									  real* DD, 
									  real* DD27,
									  real* temp,
									  real* velo,
									  real diffusivity,
									  int* k_Q, 
									  real* QQ,
									  unsigned int numberOfBCnodes, 
									  real om1, 
									  unsigned int* neighborX,
									  unsigned int* neighborY,
									  unsigned int* neighborZ,
									  unsigned long long numberOfLBnodes, 
									  bool isEvenTimestep);

void PropVelo(   unsigned int numberOfThreads,
							unsigned int* neighborX,
							unsigned int* neighborY,
							unsigned int* neighborZ,
							real* rho,
							real* ux,
							real* uy,
							real* uz,
							int* k_Q, 
							unsigned int size_Prop,
							unsigned long long numberOfLBnodes,
							unsigned int* bcMatD,
							real* DD,
							bool EvenOrOdd);

void ScaleCF27( real* DC, 
                           real* DF, 
                           unsigned int* neighborCX,
                           unsigned int* neighborCY,
                           unsigned int* neighborCZ,
                           unsigned int* neighborFX,
                           unsigned int* neighborFY,
                           unsigned int* neighborFZ,
                           unsigned long long numberOfLBnodesC, 
                           unsigned long long numberOfLBnodesF, 
                           bool isEvenTimestep,
                           unsigned int* posCSWB, 
                           unsigned int* posFSWB, 
                           unsigned int kCF, 
                           real omCoarse, 
                           real omFine, 
                           real nu, 
                           unsigned int nxC, 
                           unsigned int nyC, 
                           unsigned int nxF, 
                           unsigned int nyF,
                           unsigned int numberOfThreads);

void ScaleFC27( real* DC, 
                           real* DF, 
                           unsigned int* neighborCX,
                           unsigned int* neighborCY,
                           unsigned int* neighborCZ,
                           unsigned int* neighborFX,
                           unsigned int* neighborFY,
                           unsigned int* neighborFZ,
                           unsigned long long numberOfLBnodesC, 
                           unsigned long long numberOfLBnodesF, 
                           bool isEvenTimestep,
                           unsigned int* posC, 
                           unsigned int* posFSWB, 
                           unsigned int kFC, 
                           real omCoarse, 
                           real omFine, 
                           real nu, 
                           unsigned int nxC, 
                           unsigned int nyC, 
                           unsigned int nxF, 
                           unsigned int nyF,
                           unsigned int numberOfThreads);

void ScaleCFEff27(real* DC, 
                             real* DF, 
                             unsigned int* neighborCX,
                             unsigned int* neighborCY,
                             unsigned int* neighborCZ,
                             unsigned int* neighborFX,
                             unsigned int* neighborFY,
                             unsigned int* neighborFZ,
                             unsigned long long numberOfLBnodesC, 
                             unsigned long long numberOfLBnodesF, 
                             bool isEvenTimestep,
                             unsigned int* posCSWB, 
                             unsigned int* posFSWB, 
                             unsigned int kCF, 
                             real omCoarse, 
                             real omFine, 
                             real nu, 
                             unsigned int nxC, 
                             unsigned int nyC, 
                             unsigned int nxF, 
                             unsigned int nyF,
                             unsigned int numberOfThreads,
                             ICellNeigh neighborCoarseToFine);

void ScaleFCEff27(real* DC, 
                             real* DF, 
                             unsigned int* neighborCX,
                             unsigned int* neighborCY,
                             unsigned int* neighborCZ,
                             unsigned int* neighborFX,
                             unsigned int* neighborFY,
                             unsigned int* neighborFZ,
                             unsigned long long numberOfLBnodesC, 
                             unsigned long long numberOfLBnodesF, 
                             bool isEvenTimestep,
                             unsigned int* posC, 
                             unsigned int* posFSWB, 
                             unsigned int kFC, 
                             real omCoarse, 
                             real omFine, 
                             real nu, 
                             unsigned int nxC, 
                             unsigned int nyC, 
                             unsigned int nxF, 
                             unsigned int nyF,
                             unsigned int numberOfThreads,
                             ICellNeigh neighborFineToCoarse);

void ScaleCFLast27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
                              unsigned int kCF, 
                              real omCoarse, 
                              real omFine, 
                              real nu, 
                              unsigned int nxC, 
                              unsigned int nyC, 
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
                              ICellNeigh neighborCoarseToFine);

void ScaleFCLast27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
                              unsigned int kFC, 
                              real omCoarse, 
                              real omFine, 
                              real nu, 
                              unsigned int nxC, 
                              unsigned int nyC, 
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
                              ICellNeigh neighborFineToCoarse);

void ScaleCFpress27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
                              unsigned int kCF, 
                              real omCoarse, 
                              real omFine, 
                              real nu, 
                              unsigned int nxC, 
                              unsigned int nyC, 
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
                              ICellNeigh neighborCoarseToFine);

void ScaleFCpress27(  real* DC, 
                                 real* DF, 
                                 unsigned int* neighborCX,
                                 unsigned int* neighborCY,
                                 unsigned int* neighborCZ,
                                 unsigned int* neighborFX,
                                 unsigned int* neighborFY,
                                 unsigned int* neighborFZ,
                                 unsigned long long numberOfLBnodesC, 
                                 unsigned long long numberOfLBnodesF, 
                                 bool isEvenTimestep,
                                 unsigned int* posC, 
                                 unsigned int* posFSWB, 
                                 unsigned int kFC, 
                                 real omCoarse, 
                                 real omFine, 
                                 real nu, 
                                 unsigned int nxC, 
                                 unsigned int nyC, 
                                 unsigned int nxF, 
                                 unsigned int nyF,
                                 unsigned int numberOfThreads,
                                 ICellNeigh neighborFineToCoarse);

void ScaleCF_Fix_27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
                              unsigned int kCF, 
                              real omCoarse, 
                              real omFine, 
                              real nu, 
                              unsigned int nxC, 
                              unsigned int nyC, 
                              unsigned int nxF, 
                              unsigned int nyF,
                              unsigned int numberOfThreads,
                              ICellNeigh neighborCoarseToFine);

void ScaleCF_Fix_comp_27(   real* DC, 
									   real* DF, 
									   unsigned int* neighborCX,
									   unsigned int* neighborCY,
									   unsigned int* neighborCZ,
									   unsigned int* neighborFX,
									   unsigned int* neighborFY,
									   unsigned int* neighborFZ,
									   unsigned long long numberOfLBnodesC, 
									   unsigned long long numberOfLBnodesF, 
									   bool isEvenTimestep,
									   unsigned int* posCSWB, 
									   unsigned int* posFSWB, 
									   unsigned int kCF, 
									   real omCoarse, 
									   real omFine, 
									   real nu, 
									   unsigned int nxC, 
									   unsigned int nyC, 
									   unsigned int nxF, 
									   unsigned int nyF,
									   unsigned int numberOfThreads,
									   ICellNeigh neighborCoarseToFine);

void ScaleCF_0817_comp_27(  real* DC, 
									   real* DF, 
									   unsigned int* neighborCX,
									   unsigned int* neighborCY,
									   unsigned int* neighborCZ,
									   unsigned int* neighborFX,
									   unsigned int* neighborFY,
									   unsigned int* neighborFZ,
									   unsigned long long numberOfLBnodesC, 
									   unsigned long long numberOfLBnodesF, 
									   bool isEvenTimestep,
									   unsigned int* posCSWB, 
									   unsigned int* posFSWB, 
									   unsigned int kCF, 
									   real omCoarse, 
									   real omFine, 
									   real nu, 
									   unsigned int nxC, 
									   unsigned int nyC, 
									   unsigned int nxF, 
									   unsigned int nyF,
									   unsigned int numberOfThreads,
									   ICellNeigh neighborCoarseToFine,
									   CUstream_st* stream);

void ScaleCF_comp_D3Q27F3_2018(	real* DC,
											real* DF,
											real* G6, 
											unsigned int* neighborCX,
											unsigned int* neighborCY,
											unsigned int* neighborCZ,
											unsigned int* neighborFX,
											unsigned int* neighborFY,
											unsigned int* neighborFZ,
											unsigned long long numberOfLBnodesC, 
											unsigned long long numberOfLBnodesF, 
											bool isEvenTimestep,
											unsigned int* posCSWB, 
											unsigned int* posFSWB, 
											unsigned int kCF, 
											real omCoarse, 
											real omFine, 
											real nu, 
											unsigned int nxC, 
											unsigned int nyC, 
											unsigned int nxF, 
											unsigned int nyF,
											unsigned int numberOfThreads,
											ICellNeigh neighborCoarseToFine);

void ScaleCF_comp_D3Q27F3(real* DC,
									 real* DF,
									 real* G6, 
									 unsigned int* neighborCX,
									 unsigned int* neighborCY,
									 unsigned int* neighborCZ,
									 unsigned int* neighborFX,
									 unsigned int* neighborFY,
									 unsigned int* neighborFZ,
									 unsigned long long numberOfLBnodesC, 
									 unsigned long long numberOfLBnodesF, 
									 bool isEvenTimestep,
									 unsigned int* posCSWB, 
									 unsigned int* posFSWB, 
									 unsigned int kCF, 
									 real omCoarse, 
									 real omFine, 
									 real nu, 
									 unsigned int nxC, 
									 unsigned int nyC, 
									 unsigned int nxF, 
									 unsigned int nyF,
									 unsigned int numberOfThreads,
									 ICellNeigh neighborCoarseToFine,
									 CUstream_st *stream);

void ScaleCF_staggered_time_comp_27( real* DC, 
												real* DF, 
												unsigned int* neighborCX,
												unsigned int* neighborCY,
												unsigned int* neighborCZ,
												unsigned int* neighborFX,
												unsigned int* neighborFY,
												unsigned int* neighborFZ,
												unsigned long long numberOfLBnodesC, 
												unsigned long long numberOfLBnodesF, 
												bool isEvenTimestep,
												unsigned int* posCSWB, 
												unsigned int* posFSWB, 
												unsigned int kCF, 
												real omCoarse, 
												real omFine, 
												real nu, 
												unsigned int nxC, 
												unsigned int nyC, 
												unsigned int nxF, 
												unsigned int nyF,
												unsigned int numberOfThreads,
												ICellNeigh neighborCoarseToFine);

void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsCoarseToFine, ICellNeigh &neighborCoarseToFine, CUstream_st *stream);

template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsCoarseToFine, ICellNeigh &neighborCoarseToFine, CUstream_st *stream);

void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
											  real* DF, 
											  unsigned int* neighborCX,
											  unsigned int* neighborCY,
											  unsigned int* neighborCZ,
											  unsigned int* neighborFX,
											  unsigned int* neighborFY,
											  unsigned int* neighborFZ,
											  unsigned long long numberOfLBnodesC, 
											  unsigned long long numberOfLBnodesF, 
											  bool isEvenTimestep,
											  unsigned int* posCSWB, 
											  unsigned int* posFSWB, 
											  unsigned int kCF, 
											  real omCoarse, 
											  real omFine, 
											  real nu, 
											  unsigned int nxC, 
											  unsigned int nyC, 
											  unsigned int nxF, 
											  unsigned int nyF,
											  unsigned int numberOfThreads,
											  ICellNeigh neighborCoarseToFine,
											  CUstream_st *stream);

void ScaleCF_AA2016_comp_27( real* DC, 
										real* DF, 
										unsigned int* neighborCX,
										unsigned int* neighborCY,
										unsigned int* neighborCZ,
										unsigned int* neighborFX,
										unsigned int* neighborFY,
										unsigned int* neighborFZ,
										unsigned long long numberOfLBnodesC, 
										unsigned long long numberOfLBnodesF, 
										bool isEvenTimestep,
										unsigned int* posCSWB, 
										unsigned int* posFSWB, 
										unsigned int kCF, 
										real omCoarse, 
										real omFine, 
										real nu, 
										unsigned int nxC, 
										unsigned int nyC, 
										unsigned int nxF, 
										unsigned int nyF,
										unsigned int numberOfThreads,
										ICellNeigh neighborCoarseToFine,
										CUstream_st *stream);

void ScaleCF_NSPress_27(real* DC, 
								  real* DF, 
								  unsigned int* neighborCX,
								  unsigned int* neighborCY,
								  unsigned int* neighborCZ,
								  unsigned int* neighborFX,
								  unsigned int* neighborFY,
								  unsigned int* neighborFZ,
								  unsigned long long numberOfLBnodesC, 
								  unsigned long long numberOfLBnodesF, 
								  bool isEvenTimestep,
								  unsigned int* posCSWB, 
								  unsigned int* posFSWB, 
								  unsigned int kCF, 
								  real omCoarse, 
								  real omFine, 
								  real nu, 
								  unsigned int nxC, 
								  unsigned int nyC, 
								  unsigned int nxF, 
								  unsigned int nyF,
								  unsigned int numberOfThreads,
								  ICellNeigh neighborCoarseToFine);

void ScaleFC_Fix_27(  real* DC, 
                                 real* DF, 
                                 unsigned int* neighborCX,
                                 unsigned int* neighborCY,
                                 unsigned int* neighborCZ,
                                 unsigned int* neighborFX,
                                 unsigned int* neighborFY,
                                 unsigned int* neighborFZ,
                                 unsigned long long numberOfLBnodesC, 
                                 unsigned long long numberOfLBnodesF, 
                                 bool isEvenTimestep,
                                 unsigned int* posC, 
                                 unsigned int* posFSWB, 
                                 unsigned int kFC, 
                                 real omCoarse, 
                                 real omFine, 
                                 real nu, 
                                 unsigned int nxC, 
                                 unsigned int nyC, 
                                 unsigned int nxF, 
                                 unsigned int nyF,
                                 unsigned int numberOfThreads,
                                 ICellNeigh neighborFineToCoarse);

void ScaleFC_Fix_comp_27(   real* DC, 
									   real* DF, 
									   unsigned int* neighborCX,
									   unsigned int* neighborCY,
									   unsigned int* neighborCZ,
									   unsigned int* neighborFX,
									   unsigned int* neighborFY,
									   unsigned int* neighborFZ,
									   unsigned long long numberOfLBnodesC, 
									   unsigned long long numberOfLBnodesF, 
									   bool isEvenTimestep,
									   unsigned int* posC, 
									   unsigned int* posFSWB, 
									   unsigned int kFC, 
									   real omCoarse, 
									   real omFine, 
									   real nu, 
									   unsigned int nxC, 
									   unsigned int nyC, 
									   unsigned int nxF, 
									   unsigned int nyF,
									   unsigned int numberOfThreads,
									   ICellNeigh neighborFineToCoarse);

void ScaleFC_0817_comp_27(  real* DC, 
									   real* DF, 
									   unsigned int* neighborCX,
									   unsigned int* neighborCY,
									   unsigned int* neighborCZ,
									   unsigned int* neighborFX,
									   unsigned int* neighborFY,
									   unsigned int* neighborFZ,
									   unsigned long long numberOfLBnodesC, 
									   unsigned long long numberOfLBnodesF, 
									   bool isEvenTimestep,
									   unsigned int* posC, 
									   unsigned int* posFSWB, 
									   unsigned int kFC, 
									   real omCoarse, 
									   real omFine, 
									   real nu, 
									   unsigned int nxC, 
									   unsigned int nyC, 
									   unsigned int nxF, 
									   unsigned int nyF,
									   unsigned int numberOfThreads,
									   ICellNeigh neighborFineToCoarse,
									   CUstream_st *stream);

void ScaleFC_comp_D3Q27F3_2018(real* DC,
										  real* DF,
										  real* G6,
										  unsigned int* neighborCX,
										  unsigned int* neighborCY,
										  unsigned int* neighborCZ,
										  unsigned int* neighborFX,
										  unsigned int* neighborFY,
										  unsigned int* neighborFZ,
										  unsigned long long numberOfLBnodesC, 
										  unsigned long long numberOfLBnodesF, 
										  bool isEvenTimestep,
										  unsigned int* posC, 
										  unsigned int* posFSWB, 
										  unsigned int kFC, 
										  real omCoarse, 
										  real omFine, 
										  real nu, 
										  unsigned int nxC, 
										  unsigned int nyC, 
										  unsigned int nxF, 
										  unsigned int nyF,
										  unsigned int numberOfThreads,
										  ICellNeigh neighborFineToCoarse);

void ScaleFC_comp_D3Q27F3( real* DC,
									  real* DF,
									  real* G6,
									  unsigned int* neighborCX,
									  unsigned int* neighborCY,
									  unsigned int* neighborCZ,
									  unsigned int* neighborFX,
									  unsigned int* neighborFY,
									  unsigned int* neighborFZ,
									  unsigned long long numberOfLBnodesC, 
									  unsigned long long numberOfLBnodesF, 
									  bool isEvenTimestep,
									  unsigned int* posC, 
									  unsigned int* posFSWB, 
									  unsigned int kFC, 
									  real omCoarse, 
									  real omFine, 
									  real nu, 
									  unsigned int nxC, 
									  unsigned int nyC, 
									  unsigned int nxF, 
									  unsigned int nyF,
									  unsigned int numberOfThreads,
									  ICellNeigh neighborFineToCoarse,
									  CUstream_st *stream);

void ScaleFC_staggered_time_comp_27( real* DC, 
												real* DF, 
												unsigned int* neighborCX,
												unsigned int* neighborCY,
												unsigned int* neighborCZ,
												unsigned int* neighborFX,
												unsigned int* neighborFY,
												unsigned int* neighborFZ,
												unsigned long long numberOfLBnodesC, 
												unsigned long long numberOfLBnodesF, 
												bool isEvenTimestep,
												unsigned int* posC, 
												unsigned int* posFSWB, 
												unsigned int kFC, 
												real omCoarse, 
												real omFine, 
												real nu, 
												unsigned int nxC, 
												unsigned int nyC, 
												unsigned int nxF, 
												unsigned int nyF,
												unsigned int numberOfThreads,
												ICellNeigh neighborFineToCoarse);

void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * interpolationCellsFineToCoarse, ICellNeigh& neighborFineToCoarse, CUstream_st *stream);

template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICells * icellFC, ICellNeigh& neighborFineToCoarse, CUstream_st *stream);

void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
											  real* DF, 
											  unsigned int* neighborCX,
											  unsigned int* neighborCY,
											  unsigned int* neighborCZ,
											  unsigned int* neighborFX,
											  unsigned int* neighborFY,
											  unsigned int* neighborFZ,
											  unsigned long long numberOfLBnodesC, 
											  unsigned long long numberOfLBnodesF, 
											  bool isEvenTimestep,
											  unsigned int* posC, 
											  unsigned int* posFSWB, 
											  unsigned int kFC, 
											  real omCoarse, 
											  real omFine, 
											  real nu, 
											  unsigned int nxC, 
											  unsigned int nyC, 
											  unsigned int nxF, 
											  unsigned int nyF,
											  unsigned int numberOfThreads,
											  ICellNeigh neighborFineToCoarse,
											  CUstream_st *stream);

void ScaleFC_AA2016_comp_27( real* DC, 
										real* DF, 
										unsigned int* neighborCX,
										unsigned int* neighborCY,
										unsigned int* neighborCZ,
										unsigned int* neighborFX,
										unsigned int* neighborFY,
										unsigned int* neighborFZ,
										unsigned long long numberOfLBnodesC, 
										unsigned long long numberOfLBnodesF, 
										bool isEvenTimestep,
										unsigned int* posC, 
										unsigned int* posFSWB, 
										unsigned int kFC, 
										real omCoarse, 
										real omFine, 
										real nu, 
										unsigned int nxC, 
										unsigned int nyC, 
										unsigned int nxF, 
										unsigned int nyF,
										unsigned int numberOfThreads,
										ICellNeigh neighborFineToCoarse,
										CUstream_st *stream);

void ScaleFC_NSPress_27(  real* DC, 
									 real* DF, 
									 unsigned int* neighborCX,
									 unsigned int* neighborCY,
									 unsigned int* neighborCZ,
									 unsigned int* neighborFX,
									 unsigned int* neighborFY,
									 unsigned int* neighborFZ,
									 unsigned long long numberOfLBnodesC, 
									 unsigned long long numberOfLBnodesF, 
									 bool isEvenTimestep,
									 unsigned int* posC, 
									 unsigned int* posFSWB, 
									 unsigned int kFC, 
									 real omCoarse, 
									 real omFine, 
									 real nu, 
									 unsigned int nxC, 
									 unsigned int nyC, 
									 unsigned int nxF, 
									 unsigned int nyF,
									 unsigned int numberOfThreads,
									 ICellNeigh neighborFineToCoarse);

void ScaleCFThS7(  real* DC, 
                              real* DF, 
                              real* DD7C, 
                              real* DD7F,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
                              unsigned int kCF, 
                              real nu,
                              real diffusivity_fine,
                              unsigned int numberOfThreads);

void ScaleFCThS7(  real* DC, 
                              real* DF,
                              real* DD7C, 
                              real* DD7F,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
                              unsigned int kFC, 
                              real nu,
                              real diffusivity_coarse,
                              unsigned int numberOfThreads);

void ScaleCFThSMG7(   real* DC, 
                                 real* DF,
                                 real* DD7C, 
                                 real* DD7F,
                                 unsigned int* neighborCX,
                                 unsigned int* neighborCY,
                                 unsigned int* neighborCZ,
                                 unsigned int* neighborFX,
                                 unsigned int* neighborFY,
                                 unsigned int* neighborFZ,
                                 unsigned long long numberOfLBnodesC, 
                                 unsigned long long numberOfLBnodesF, 
                                 bool isEvenTimestep,
                                 unsigned int* posCSWB, 
                                 unsigned int* posFSWB, 
                                 unsigned int kCF, 
                                 real nu,
                                 real diffusivity_fine,
                                 unsigned int numberOfThreads,
                                 ICellNeigh neighborCoarseToFine);

void ScaleFCThSMG7(real* DC, 
                              real* DF,
                              real* DD7C, 
                              real* DD7F,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
                              unsigned int kFC, 
                              real nu,
                              real diffusivity_coarse,
                              unsigned int numberOfThreads,
                              ICellNeigh neighborFineToCoarse);

void ScaleCFThS27( real* DC, 
                              real* DF, 
                              real* DD27C, 
                              real* DD27F,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posCSWB, 
                              unsigned int* posFSWB, 
                              unsigned int kCF, 
                              real nu,
                              real diffusivity_fine,
							  unsigned int numberOfThreads,
							  ICellNeigh neighborCoarseToFine);

void ScaleFCThS27( real* DC, 
                              real* DF,
                              real* DD27C, 
                              real* DD27F,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
                              unsigned int* neighborCZ,
                              unsigned int* neighborFX,
                              unsigned int* neighborFY,
                              unsigned int* neighborFZ,
                              unsigned long long numberOfLBnodesC, 
                              unsigned long long numberOfLBnodesF, 
                              bool isEvenTimestep,
                              unsigned int* posC, 
                              unsigned int* posFSWB, 
                              unsigned int kFC, 
                              real nu,
                              real diffusivity_coarse,
							  unsigned int numberOfThreads,
							  ICellNeigh neighborFineToCoarse);

void DragLiftPostD27(real* DD, 
								int* k_Q, 
								real* QQ,
								int numberOfBCnodes, 
								double *DragX,
								double *DragY,
								double *DragZ,
								unsigned int* neighborX,
								unsigned int* neighborY,
								unsigned int* neighborZ,
								unsigned long long numberOfLBnodes, 
								bool isEvenTimestep,
								unsigned int numberOfThreads);

void DragLiftPreD27( real* DD, 
								int* k_Q, 
								real* QQ,
								int numberOfBCnodes, 
								double *DragX,
								double *DragY,
								double *DragZ,
								unsigned int* neighborX,
								unsigned int* neighborY,
								unsigned int* neighborZ,
								unsigned long long numberOfLBnodes, 
								bool isEvenTimestep,
								unsigned int numberOfThreads);

void CalcCPtop27(real* DD, 
							int* cpIndex, 
							int nonCp, 
							double *cpPress,
							unsigned int* neighborX,
							unsigned int* neighborY,
							unsigned int* neighborZ,
							unsigned long long numberOfLBnodes, 
							bool isEvenTimestep,
							unsigned int numberOfThreads);

void CalcCPbottom27(real* DD, 
							   int* cpIndex, 
							   int nonCp, 
							   double *cpPress,
							   unsigned int* neighborX,
							   unsigned int* neighborY,
							   unsigned int* neighborZ,
							   unsigned long long numberOfLBnodes, 
							   bool isEvenTimestep,
							   unsigned int numberOfThreads);

void GetSendFsPreDev27(real* DD,
								  real* bufferFs,
								  int* sendIndex,
								  int buffmax,
								  unsigned int* neighborX,
								  unsigned int* neighborY,
								  unsigned int* neighborZ,
								  unsigned long long numberOfLBnodes, 
								  bool isEvenTimestep,
								  unsigned int numberOfThreads, 
	                              cudaStream_t stream = CU_STREAM_LEGACY);

void GetSendFsPostDev27(real* DD,
								   real* bufferFs,
								   int* sendIndex,
								   int buffmax,
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep,
								   unsigned int numberOfThreads, 
	                               cudaStream_t stream = CU_STREAM_LEGACY);

void SetRecvFsPreDev27(real* DD,
								  real* bufferFs,
								  int* recvIndex,
								  int buffmax,
								  unsigned int* neighborX,
								  unsigned int* neighborY,
								  unsigned int* neighborZ,
								  unsigned long long numberOfLBnodes, 
								  bool isEvenTimestep, unsigned int numberOfThreads, 
	                              cudaStream_t stream = CU_STREAM_LEGACY);

void SetRecvFsPostDev27(real* DD,
								   real* bufferFs,
								   int* recvIndex,
								   int buffmax,
								   unsigned int* neighborX,
								   unsigned int* neighborY,
								   unsigned int* neighborZ,
								   unsigned long long numberOfLBnodes, 
								   bool isEvenTimestep,
								   unsigned int numberOfThreads,
                                   cudaStream_t stream = CU_STREAM_LEGACY);

void getSendGsDevF3(
	real* G6,
	real* bufferGs,
	int* sendIndex,
	int buffmax,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	unsigned long long numberOfLBnodes,
	bool isEvenTimestep,
	unsigned int numberOfThreads);

void setRecvGsDevF3(
	real* G6,
	real* bufferGs,
	int* recvIndex,
	int buffmax,
	unsigned int* neighborX,
	unsigned int* neighborY,
	unsigned int* neighborZ,
	unsigned long long numberOfLBnodes,
	bool isEvenTimestep,
	unsigned int numberOfThreads);

void WallFuncDev27(unsigned int numberOfThreads,
							  real* vx,
							  real* vy,
							  real* vz,
							  real* DD, 
							  int* k_Q, 
							  real* QQ,
							  unsigned int numberOfBCnodes, 
							  real om1, 
							  unsigned int* neighborX,
							  unsigned int* neighborY,
							  unsigned int* neighborZ,
							  unsigned long long numberOfLBnodes, 
							  bool isEvenTimestep);

void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
										  real* vxD,
										  real* vyD,
										  real* vzD,
										  real* vxWall,
										  real* vyWall,
										  real* vzWall,
										  int numberOfWallNodes, 
										  int* kWallNodes, 
										  real* rhoD,
										  real* pressD,
										  unsigned int* geoD,
										  unsigned int* neighborX,
										  unsigned int* neighborY,
										  unsigned int* neighborZ,
										  unsigned long long numberOfLBnodes,
										  real* DD,
										  bool isEvenTimestep);

void GetVelotoForce27(unsigned int numberOfThreads,
								 real* DD, 
								 int* bcIndex, 
								 int nonAtBC, 
								 real* Vx,
								 real* Vy,
								 real* Vz,
								 unsigned int* neighborX,
								 unsigned int* neighborY,
								 unsigned int* neighborZ,
								 unsigned long long numberOfLBnodes, 
								 bool isEvenTimestep);

void InitParticlesDevice(real* coordX,
									real* coordY,
									real* coordZ, 
									real* coordParticleXlocal,
									real* coordParticleYlocal,
									real* coordParticleZlocal, 
									real* coordParticleXglobal,
									real* coordParticleYglobal,
									real* coordParticleZglobal,
									real* veloParticleX,
									real* veloParticleY,
									real* veloParticleZ,
									real* randArray,
									unsigned int* particleID,
									unsigned int* cellBaseID,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned int* neighborWSB,
									int level,
									unsigned int numberOfParticles, 
									unsigned long long numberOfLBnodes,
									unsigned int numberOfThreads);

void MoveParticlesDevice(real* coordX,
									real* coordY,
									real* coordZ, 
									real* coordParticleXlocal,
									real* coordParticleYlocal,
									real* coordParticleZlocal, 
									real* coordParticleXglobal,
									real* coordParticleYglobal,
									real* coordParticleZglobal,
									real* veloParticleX,
									real* veloParticleY,
									real* veloParticleZ,
									real* DD,
									real  omega,
									unsigned int* particleID,
									unsigned int* cellBaseID,
									unsigned int* bcMatD,
									unsigned int* neighborX,
									unsigned int* neighborY,
									unsigned int* neighborZ,
									unsigned int* neighborWSB,
									int level,
									unsigned int timestep, 
									unsigned int numberOfTimesteps, 
									unsigned int numberOfParticles, 
									unsigned long long numberOfLBnodes,
									unsigned int numberOfThreads,
									bool isEvenTimestep);

void initRandomDevice(curandState* state,
								 unsigned long long numberOfLBnodes,
								 unsigned int numberOfThreads);

void generateRandomValuesDevice(curandState* state,
										   unsigned long long numberOfLBnodes,
										   real* randArray,
										   unsigned int numberOfThreads);

void CalcTurbulenceIntensityDevice(
   real* vxx,
   real* vyy,
   real* vzz,
   real* vxy,
   real* vxz,
   real* vyz,
   real* vx_mean,
   real* vy_mean,
   real* vz_mean,
   real* DD, 
   uint *typeOfGridNode, 
   unsigned int* neighborX,
   unsigned int* neighborY,
   unsigned int* neighborZ,
   unsigned long long numberOfLBnodes, 
   bool isEvenTimestep,
   uint numberOfThreads);

#endif