diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 2b2a2e68d0980b4d3aa4b27a04bd8ce703704a05..957935756163795945e9ac1c4762e0eb825239ee 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -118,6 +118,12 @@ void GridProvider::allocAndCopyForcing()
 {
     cudaMemoryManager->cudaAllocForcing();
     cudaMemoryManager->cudaCopyForcingToDevice();
+
+    for (int level = para->getCoarse(); level <= para->getFine(); level++)
+    {
+        cudaMemoryManager->cudaAllocLevelForcing(level);
+        cudaMemoryManager->cudaCopyLevelForcingToDevice(level);
+    }
 }
 
 void GridProvider::allocAndCopyQuadricLimiters()
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 6c1585bd689a5f2e2da603d5e9fdb81cbc175aab..aeafe342f0680763fbbffe63cf1c6760e61c2102 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -6,6 +6,8 @@
 
 #include <Parameter/Parameter.h>
 
+#include <lbm/constants/NumericConstants.h>
+
 void CudaMemoryManager::cudaAllocFull(int lev)
 {
     checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo      ), parameter->getParH(lev)->mem_size_int  ));
@@ -404,6 +406,44 @@ void CudaMemoryManager::cudaFreeForcing()
 {
 	checkCudaErrors( cudaFreeHost(parameter->getForcesHost()));
 }
+
+void CudaMemoryManager::cudaAllocLevelForcing(int level)
+{
+    real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. };
+    for (int i = 0; i < level; i++) {
+        fx_t *= vf::lbm::constant::c2o1;
+        fy_t *= vf::lbm::constant::c2o1;
+        fz_t *= vf::lbm::constant::c2o1;
+    }
+
+    const unsigned int mem_size = sizeof(real) * 3;
+
+    //Host
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(level)->forcing), mem_size));
+    parameter->getParH(level)->forcing[0] = parameter->forcingH[0] / fx_t;
+    parameter->getParH(level)->forcing[1] = parameter->forcingH[1] / fy_t;
+    parameter->getParH(level)->forcing[2] = parameter->forcingH[2] / fz_t;
+
+	//Device
+	checkCudaErrors( cudaMalloc((void**) &parameter->getParD(level)->forcing, mem_size));
+	//////////////////////////////////////////////////////////////////////////
+	const double tmp = (double)mem_size;
+	setMemsizeGPU(tmp, false);
+}
+
+void CudaMemoryManager::cudaCopyLevelForcingToDevice(int level)
+{
+	unsigned int mem_size = sizeof(real) * 3;
+	checkCudaErrors( cudaMemcpy(parameter->getParD(level)->forcing, parameter->getParH(level)->forcing, mem_size, cudaMemcpyHostToDevice));
+}
+
+void CudaMemoryManager::cudaFreeLevelForcing(int level)
+{
+	checkCudaErrors( cudaFreeHost(parameter->getParH(level)->forcing));
+    checkCudaErrors( cudaFree(parameter->getParD(level)->forcing));
+}
+
+
 //quadric Limiters
 void CudaMemoryManager::cudaAllocQuadricLimiters()
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index c26db65afd384ac7a4e6c436c4dea2c46647ae95..4853205f510348a954fbc8dcdaed72385a30d559 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -78,6 +78,10 @@ public:
 	void cudaCopyForcingToHost();
 	void cudaFreeForcing();
 
+    void cudaAllocLevelForcing(int level);
+	void cudaCopyLevelForcingToDevice(int level);
+	void cudaFreeLevelForcing(int level);
+
 	void cudaAllocQuadricLimiters();
 	void cudaCopyQuadricLimitersToDevice();
 	void cudaFreeQuadricLimiters();
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a99bb716b6e7cd08719aad3c781863db47367757
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -0,0 +1,51 @@
+#include "BGKUnified.h"
+
+#include "Parameter/Parameter.h"
+#include "../CumulantKernel.cuh"
+#include "Kernel/Utilities/CudaGrid.h"
+#include <stdexcept>
+
+#include <lbm/BGK.h>
+
+
+std::shared_ptr<BGKUnified> BGKUnified::getNewInstance(std::shared_ptr<Parameter> para, int level)
+{
+    return std::make_shared<BGKUnified>(para, level);
+}
+
+void BGKUnified::run()
+{
+    vf::gpu::LBMKernelParameter kernelParameter{ para->getParD(level)->omega,
+                                                 para->getParD(level)->geoSP,
+                                                 para->getParD(level)->neighborX_SP,
+                                                 para->getParD(level)->neighborY_SP,
+                                                 para->getParD(level)->neighborZ_SP,
+                                                 para->getParD(level)->d0SP.f[0],
+                                                 (int)para->getParD(level)->size_Mat_SP,
+                                                 nullptr, /* forces not used in bgk kernel */
+                                                 para->getParD(level)->evenOrOdd };
+
+    auto lambda = [] __device__(vf::lbm::CumulantChimeraParameter parameter) {
+        return vf::lbm::bgk(parameter);
+    };
+
+    vf::gpu::cumulantKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+
+    getLastCudaError("LB_Kernel_BGKUnified execution failed");
+}
+
+BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    this->para  = para;
+    this->level = level;
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
new file mode 100644
index 0000000000000000000000000000000000000000..d8438173afd3dbb7259b110ef4f2939a11b6e196
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
@@ -0,0 +1,15 @@
+#ifndef GPU_BKGUnified_H
+#define GPU_BKGUnified_H
+
+#include "Kernel/KernelImp.h"
+
+class BGKUnified : public KernelImp
+{
+public:
+    static std::shared_ptr<BGKUnified> getNewInstance(std::shared_ptr<Parameter> para, int level);
+    void run();
+
+    BGKUnified(std::shared_ptr<Parameter> para, int level);
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index 3b464ee1d037ae35feb654149122c09d3e27467d..7aecc31a6ed1e40f47abd72e8d02a93b261145c3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -22,8 +22,7 @@ void CumulantK15Unified::run()
                                                  para->getParD(level)->neighborZ_SP,
                                                  para->getParD(level)->d0SP.f[0],
                                                  (int)para->getParD(level)->size_Mat_SP,
-                                                 level,
-                                                 para->getForcesDev(),
+                                                 para->getParD(level)->forcing,
                                                  para->getParD(level)->evenOrOdd };
 
     auto lambda = [] __device__(vf::lbm::CumulantChimeraParameter parameter) {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index b25456ce6ac79f4c96245e379ec17d5bff47c5ad..0ebb95702e63e4caa499c00cae33470c86d0ab93 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -22,8 +22,7 @@ void CumulantK17Unified::run()
                                                  para->getParD(level)->neighborZ_SP,
                                                  para->getParD(level)->d0SP.f[0],
                                                  (int)para->getParD(level)->size_Mat_SP,
-                                                 level,
-                                                 para->getForcesDev(),
+                                                 para->getParD(level)->forcing,
                                                  para->getParD(level)->evenOrOdd };
 
     auto lambda = [] __device__(vf::lbm::CumulantChimeraParameter parameter) {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
index 53104c5bd287b21c3a5b2ab9f0447dd3be22eda1..9c595a2ec17ef34a8e4d75863266854935f20716 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
@@ -25,7 +25,6 @@ struct LBMKernelParameter
     unsigned int* neighborZ;
     real* distributions;
     int size_Mat;
-    int level;
     real* forces;
     bool isEvenTimestep;
 };
@@ -49,11 +48,7 @@ __global__ void cumulantKernel(KernelFunctor kernel, LBMKernelParameter kernelPa
         kernelParameter.neighborZ
     };
 
-
-    real level_forces[3];
-    vf::gpu::getLevelForce(kernelParameter.forces[0], kernelParameter.forces[1], kernelParameter.forces[2], kernelParameter.level, level_forces);
-
-    lbm::CumulantChimeraParameter chimeraParameter {distributionWrapper.distribution, kernelParameter.omega, level_forces};
+    lbm::CumulantChimeraParameter chimeraParameter {distributionWrapper.distribution, kernelParameter.omega, kernelParameter.forces};
     kernel(chimeraParameter);
 
     distributionWrapper.write();
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index e56462a0702a5e1c31121c9c9fe73827e5c93554..efad7278d3e9d8b06355d9e8d4f230fa2c3c36b0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -4,6 +4,7 @@
 
 //LBM kernel (compressible)
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.h"
@@ -102,10 +103,13 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
 	std::shared_ptr<KernelImp> newKernel;
 	std::shared_ptr<CheckParameterStrategy> checkStrategy;
 
-	if (       kernel == "BGKCompSP27") {
-        newKernel     = BGKCompSP27::getNewInstance(para, level);				// compressible
-        checkStrategy = FluidFlowCompStrategy::getInstance();     //	   ||
-    } else if (kernel == "BGKPlusCompSP27") {									//     \/
+    if (kernel == "BGKCompSP27") {
+        newKernel     = BGKCompSP27::getNewInstance(para, level);   // compressible
+        checkStrategy = FluidFlowCompStrategy::getInstance();       //      ||
+    } else if (kernel == "BGKUnified") {                            //     \/
+        newKernel     = BGKUnified::getNewInstance(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "BGKPlusCompSP27") {
         newKernel     = BGKPlusCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "MRTCompSP27") {
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 25946704edcad12513d1b8a2c70cd551eb1dd619..ccb13e83bcc68b9c9d85725666a02ea1c23ec293 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -186,6 +186,7 @@ struct ParameterStruct{
 	//velocities to fit the force
 	real *VxForce, *VyForce, *VzForce;
 	//////////////////////////////////////////////////////////////////////////
+	real *forcing;
 
 	//Measure Points/////////
 	std::vector<MeasurePoints> MP; 
diff --git a/src/lbm/BGK.cpp b/src/lbm/BGK.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8680436e5a042d713df78eeabcbe8d35d61bcefd
--- /dev/null
+++ b/src/lbm/BGK.cpp
@@ -0,0 +1,157 @@
+#include "BGK.h"
+
+#include <cmath>
+
+#include <basics/Core/DataTypes.h>
+#include <basics/Core/RealConstants.h>
+
+#include "constants/NumericConstants.h"
+#include "constants/D3Q27.h"
+
+#include "Chimera.h"
+#include "MacroscopicQuantities.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+using namespace constant;
+
+
+
+__host__ __device__ void bgk(CumulantChimeraParameter parameter)
+{
+    auto& distribution = parameter.distribution;
+    const auto omega = parameter.omega;
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    real mfcbb = distribution.f[dir::PZZ];
+    real mfabb = distribution.f[dir::MZZ];
+    real mfbcb = distribution.f[dir::ZPZ];
+    real mfbab = distribution.f[dir::ZMZ];
+    real mfbbc = distribution.f[dir::ZZP];
+    real mfbba = distribution.f[dir::ZZM];
+    real mfccb = distribution.f[dir::PPZ];
+    real mfaab = distribution.f[dir::MMZ];
+    real mfcab = distribution.f[dir::PMZ];
+    real mfacb = distribution.f[dir::MPZ];
+    real mfcbc = distribution.f[dir::PZP];
+    real mfaba = distribution.f[dir::MZM];
+    real mfcba = distribution.f[dir::PZM];
+    real mfabc = distribution.f[dir::MZP];
+    real mfbcc = distribution.f[dir::ZPP];
+    real mfbaa = distribution.f[dir::ZMM];
+    real mfbca = distribution.f[dir::ZPM];
+    real mfbac = distribution.f[dir::ZMP];
+    real mfccc = distribution.f[dir::PPP];
+    real mfacc = distribution.f[dir::MPP];
+    real mfcac = distribution.f[dir::PMP];
+    real mfaac = distribution.f[dir::MMP];
+    real mfcca = distribution.f[dir::PPM];
+    real mfaca = distribution.f[dir::MPM];
+    real mfcaa = distribution.f[dir::PMM];
+    real mfaaa = distribution.f[dir::MMM];
+    real mfbbb = distribution.f[dir::ZZZ];
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //!
+    const real drho =
+        ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+        (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+        ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; 
+    const real rho = c1o1 + drho;
+    const real OOrho = c1o1 / rho;    
+    const real vvx = 
+        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+        (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+        (mfcbb - mfabb)) * OOrho;
+    const real vvy = 
+        ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+        (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+        (mfbcb - mfbab)) * OOrho;
+    const real vvz = 
+        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+        (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+        (mfbbc - mfbba)) * OOrho;
+
+
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    //BGK comp
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    const real cusq = c3o2*(vvx*vvx + vvy*vvy + vvz*vvz);
+    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    mfbbb = mfbbb *(c1o1 + (-omega)) - (-omega)*   c8o27*  (drho - rho * cusq);
+    mfcbb = mfcbb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq));
+    mfabb = mfabb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq));
+    mfbcb = mfbcb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq));
+    mfbab = mfbab    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq));
+    mfbbc = mfbbc    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq));
+    mfbba = mfbba    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq));
+    mfccb = mfccb   *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq));
+    mfaab = mfaab   *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq));
+    mfcab = mfcab   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq));
+    mfacb = mfacb   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq));
+    mfcbc = mfcbc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq));
+    mfaba = mfaba   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq));
+    mfcba = mfcba   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq));
+    mfabc = mfabc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq));
+    mfbcc = mfbcc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq));
+    mfbaa = mfbaa   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq));
+    mfbca = mfbca   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq));
+    mfbac = mfbac   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq));
+    mfccc = mfccc  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy + vvz) + c9o2*(vvx + vvy + vvz)*(vvx + vvy + vvz) - cusq));
+    mfaaa = mfaaa  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx - vvy - vvz) + c9o2*(-vvx - vvy - vvz)*(-vvx - vvy - vvz) - cusq));
+    mfcca = mfcca  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy - vvz) + c9o2*(vvx + vvy - vvz)*(vvx + vvy - vvz) - cusq));
+    mfaac = mfaac  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx - vvy + vvz) + c9o2*(-vvx - vvy + vvz)*(-vvx - vvy + vvz) - cusq));
+    mfcac = mfcac  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx - vvy + vvz) + c9o2*(vvx - vvy + vvz)*(vvx - vvy + vvz) - cusq));
+    mfaca = mfaca  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx + vvy - vvz) + c9o2*(-vvx + vvy - vvz)*(-vvx + vvy - vvz) - cusq));
+    mfcaa = mfcaa  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx - vvy - vvz) + c9o2*(vvx - vvy - vvz)*(vvx - vvy - vvz) - cusq));
+    mfacc = mfacc  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx + vvy + vvz) + c9o2*(-vvx + vvy + vvz)*(-vvx + vvy + vvz) - cusq));
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Write distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    distribution.f[vf::lbm::dir::MZZ] = mfcbb;
+    distribution.f[vf::lbm::dir::PZZ] = mfabb;
+    distribution.f[vf::lbm::dir::ZMZ] = mfbcb;
+    distribution.f[vf::lbm::dir::ZPZ] = mfbab;
+    distribution.f[vf::lbm::dir::ZZM] = mfbbc;
+    distribution.f[vf::lbm::dir::ZZP] = mfbba;
+    distribution.f[vf::lbm::dir::MMZ] = mfccb;
+    distribution.f[vf::lbm::dir::PPZ] = mfaab;
+    distribution.f[vf::lbm::dir::MPZ] = mfcab;
+    distribution.f[vf::lbm::dir::PMZ] = mfacb;
+    distribution.f[vf::lbm::dir::MZM] = mfcbc;
+    distribution.f[vf::lbm::dir::PZP] = mfaba;
+    distribution.f[vf::lbm::dir::MZP] = mfcba;
+    distribution.f[vf::lbm::dir::PZM] = mfabc;
+    distribution.f[vf::lbm::dir::ZMM] = mfbcc;
+    distribution.f[vf::lbm::dir::ZPP] = mfbaa;
+    distribution.f[vf::lbm::dir::ZMP] = mfbca;
+    distribution.f[vf::lbm::dir::ZPM] = mfbac;
+    distribution.f[vf::lbm::dir::MMM] = mfccc;
+    distribution.f[vf::lbm::dir::PMM] = mfacc;
+    distribution.f[vf::lbm::dir::MPM] = mfcac;
+    distribution.f[vf::lbm::dir::PPM] = mfaac;
+    distribution.f[vf::lbm::dir::MMP] = mfcca;
+    distribution.f[vf::lbm::dir::PMP] = mfaca;
+    distribution.f[vf::lbm::dir::MPP] = mfcaa;
+    distribution.f[vf::lbm::dir::PPP] = mfaaa;
+    distribution.f[vf::lbm::dir::ZZZ] = mfbbb;
+}
+
+
+}
+}
+
diff --git a/src/lbm/BGK.h b/src/lbm/BGK.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2320eca357d0b3aee3bb2236536290c2a5a954b
--- /dev/null
+++ b/src/lbm/BGK.h
@@ -0,0 +1,24 @@
+#ifndef LBM_BGK_H
+#define LBM_BGK_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include "CumulantChimeraParameter.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+__host__ __device__ void bgk(CumulantChimeraParameter parameter);
+
+}
+}
+#endif
diff --git a/src/lbm/CumulantChimera.cpp b/src/lbm/CumulantChimera.cpp
index 22be39c46e056ef380e5c4c9c4b656be19a1cd1d..87b89c94f6a767ca91dc3ce952cb332738ad676a 100644
--- a/src/lbm/CumulantChimera.cpp
+++ b/src/lbm/CumulantChimera.cpp
@@ -114,12 +114,12 @@ __host__ __device__ void cumulantChimera(CumulantChimeraParameter parameter, Rel
     //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
     //!
-    real drho =
+    const real drho =
         ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
         (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
         ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; 
-    real rho = c1o1 + drho;
-    real OOrho = c1o1 / rho;    
+    const real rho = c1o1 + drho;
+    const real OOrho = c1o1 / rho;    
     real vvx = 
         ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
         (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
index 5efe2928140d96ddc6a9287bdd81055e83638fab..7b1741478550f3d297fe1a8254de63c902888125 100644
--- a/src/lbm/cuda/CMakeLists.txt
+++ b/src/lbm/cuda/CMakeLists.txt
@@ -7,4 +7,5 @@ vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm
 set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
 set_source_files_properties(../Distribution27.cpp PROPERTIES LANGUAGE CUDA)
-set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
\ No newline at end of file
+set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
+set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA)
\ No newline at end of file