diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 8932dadb3f92256cba279121193f94dbabdc0a6d..213127e64c5e7bfe2b3d4c9d1154c11d374f9552 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -1,26 +1,19 @@
-#ifndef KERNEL_H
-#define KERNEL_H
+#ifndef GPU_KERNEL_H
+#define GPU_KERNEL_H
 
-#include <DataTypes.h>
-
-#include <cuda_runtime.h>
-#include <helper_functions.h>
-#include <helper_cuda.h>
+#include <vector>
 
 #include "Kernel/Utilities/KernelGroup.h"
 #include "PreProcessor/PreProcessorType.h"
 
-#include <vector>
-
-
 class Kernel
 {
 public:
-    virtual ~Kernel() = default;
-	virtual void run() = 0;
+    virtual ~Kernel()  = default;
+    virtual void run() = 0;
 
-	virtual bool checkParameter() = 0;
-	virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
-	virtual KernelGroup getKernelGroup() = 0;
+    virtual bool checkParameter()                                = 0;
+    virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
+    virtual KernelGroup getKernelGroup()                         = 0;
 };
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 6d06d4c9175c7935bb609c2e342b29c50c42846d..ec961e9d2a83f82e1ce57852ccab4248a99958d1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -2,26 +2,24 @@
 
 #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h"
 
-bool KernelImp::checkParameter()
-{
-	return checkStrategy->checkParameter(para);
+bool KernelImp::checkParameter() 
+{ 
+    return checkStrategy->checkParameter(para);
 }
 
-std::vector<PreProcessorType> KernelImp::getPreProcessorTypes()
-{
-	return myPreProcessorTypes;
+std::vector<PreProcessorType> KernelImp::getPreProcessorTypes() 
+{ 
+    return myPreProcessorTypes;
 }
 
-KernelGroup KernelImp::getKernelGroup()
-{
-	return myKernelGroup;
+KernelGroup KernelImp::getKernelGroup() 
+{ 
+    return myKernelGroup; 
 }
 
 void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy)
 {
-	this->checkStrategy = strategy;
+    this->checkStrategy = strategy;
 }
 
-KernelImp::KernelImp()
-{
-}
+KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index 2795ab94ea78254eaa37d4f9c0962bac4528ab1f..e293972306529ab7166028000a76db49fb36bf0f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -13,24 +13,25 @@ class Parameter;
 class KernelImp : public Kernel
 {
 public:
-	virtual void run() = 0;
+    virtual void run() = 0;
 
-	bool checkParameter();
-	std::vector<PreProcessorType> getPreProcessorTypes();
-	KernelGroup getKernelGroup();
+    bool checkParameter();
+    std::vector<PreProcessorType> getPreProcessorTypes();
+    KernelGroup getKernelGroup();
 
-	void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
+    void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
 
 protected:
-	KernelImp();
+    KernelImp(std::shared_ptr<Parameter> para, int level);
+    KernelImp() = default;
 
-	std::shared_ptr< Parameter> para;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
-	int level;
-	std::vector<PreProcessorType> myPreProcessorTypes;
-	KernelGroup myKernelGroup;
-
-	vf::gpu::CudaGrid cudaGrid;
+    std::shared_ptr<Parameter> para;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
+    int level;
+    std::vector<PreProcessorType> myPreProcessorTypes;
+    KernelGroup myKernelGroup;
 
+    vf::gpu::CudaGrid cudaGrid;
 };
-#endif
\ No newline at end of file
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
index bd3e08c859163a0852498e148f7e1380ce7c3bdd..4c82851996646590d8c246df5f940b58a308d52c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -9,14 +9,30 @@
 #include <lbm/BGK.h>
 
 
-std::shared_ptr<BGKUnified> BGKUnified::getNewInstance(std::shared_ptr<Parameter> para, int level)
+namespace vf
 {
-    return std::make_shared<BGKUnified>(para, level);
+namespace gpu
+{
+
+
+BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level) 
+    : KernelImp(para, level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
+
 void BGKUnified::run()
 {
-    vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
                                                  para->getParD(level)->geoSP,
                                                  para->getParD(level)->neighborX_SP,
                                                  para->getParD(level)->neighborY_SP,
@@ -26,27 +42,15 @@ void BGKUnified::run()
                                                  nullptr, /* forces not used in bgk kernel */
                                                  para->getParD(level)->evenOrOdd };
 
-    auto lambda = [] __device__(vf::lbm::KernelParameter parameter) {
-        return vf::lbm::bgk(parameter);
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::bgk(parameter);
     };
 
-    vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+    runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
 
     getLastCudaError("LB_Kernel_BGKUnified execution failed");
 }
 
-BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
-{
-#ifndef BUILD_CUDA_LTO
-    throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
-#endif
-
-    this->para  = para;
-    this->level = level;
 
-    myPreProcessorTypes.push_back(InitCompSP27);
-
-    myKernelGroup = BasicKernel;
-
-    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
index d8438173afd3dbb7259b110ef4f2939a11b6e196..762eaaa5935bd01fa6ae002521a40e45cd239dfd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
@@ -3,13 +3,20 @@
 
 #include "Kernel/KernelImp.h"
 
+namespace vf
+{
+namespace gpu
+{
+
 class BGKUnified : public KernelImp
 {
 public:
-    static std::shared_ptr<BGKUnified> getNewInstance(std::shared_ptr<Parameter> para, int level);
-    void run();
-
     BGKUnified(std::shared_ptr<Parameter> para, int level);
+
+    void run();
 };
 
+}
+}
+
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index 90f6dad8eea81d52957dc854f9cfd0a3282556f1..b6f5d21ccf909f1ce3bcf11a4558f4771d87d021 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -8,14 +8,29 @@
 
 #include <lbm/CumulantChimera.h>
 
-std::shared_ptr<CumulantK15Unified> CumulantK15Unified::getNewInstance(std::shared_ptr<Parameter> para, int level)
+namespace vf
 {
-    return std::make_shared<CumulantK15Unified>(para, level);
+namespace gpu
+{
+
+CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level)
+    : KernelImp(para, level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument(
+        "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
 void CumulantK15Unified::run()
 {
-    vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
                                                  para->getParD(level)->geoSP,
                                                  para->getParD(level)->neighborX_SP,
                                                  para->getParD(level)->neighborY_SP,
@@ -25,8 +40,8 @@ void CumulantK15Unified::run()
                                                  para->getParD(level)->forcing,
                                                  para->getParD(level)->evenOrOdd };
 
-    auto lambda = [] __device__(vf::lbm::KernelParameter parameter) {
-        return vf::lbm::cumulantChimera(parameter, vf::lbm::setRelaxationRatesK15);
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15);
     };
 
     vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
@@ -34,19 +49,6 @@ void CumulantK15Unified::run()
     getLastCudaError("LB_Kernel_CumulantK15Comp execution failed");
 }
 
-CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level)
-{
-#ifndef BUILD_CUDA_LTO
-    throw std::invalid_argument(
-        "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
-#endif
-
-    this->para  = para;
-    this->level = level;
-
-    myPreProcessorTypes.push_back(InitCompSP27);
-
-    myKernelGroup = BasicKernel;
 
-    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
index 666a605c40cef3d359238280f7c0578b056dbe05..8756253950484e00773af89327589c3d8f157729 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
@@ -3,12 +3,19 @@
 
 #include "Kernel/KernelImp.h"
 
+namespace vf
+{
+namespace gpu
+{
 class CumulantK15Unified : public KernelImp
 {
 public:
-    static std::shared_ptr<CumulantK15Unified> getNewInstance(std::shared_ptr<Parameter> para, int level);
-    void run();
-
     CumulantK15Unified(std::shared_ptr<Parameter> para, int level);
+    
+    void run();
 };
+
+}
+}
+
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index 40398fe9cbd4856deb61c33d5ca20d5efddbe3b4..989fce0c5e797ef90d644845f6c502bee700f6e1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -8,15 +8,31 @@
 
 #include <lbm/CumulantChimera.h>
 
+namespace vf
+{
+namespace gpu
+{
 
-std::shared_ptr<CumulantK17Unified> CumulantK17Unified::getNewInstance(std::shared_ptr<Parameter> para, int level)
+
+CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level)
+    : KernelImp(para, level)
 {
-    return std::make_shared<CumulantK17Unified>(para, level);
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument("To use the CumulantK17Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
+
+
 void CumulantK17Unified::run()
 {
-    vf::gpu::GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
                                                  para->getParD(level)->geoSP,
                                                  para->getParD(level)->neighborX_SP,
                                                  para->getParD(level)->neighborY_SP,
@@ -26,27 +42,15 @@ void CumulantK17Unified::run()
                                                  para->getParD(level)->forcing,
                                                  para->getParD(level)->evenOrOdd };
 
-    auto lambda = [] __device__(vf::lbm::KernelParameter parameter) {
-        return vf::lbm::cumulantChimera(parameter, vf::lbm::setRelaxationRatesK17);
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17);
     };
 
-    vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+    runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
 
     getLastCudaError("LB_Kernel_CumulantK17Unified execution failed");
 }
 
-CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level)
-{
-#ifndef BUILD_CUDA_LTO
-    throw std::invalid_argument("To use the CumulantK17Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
-#endif
-
-    this->para  = para;
-    this->level = level;
-
-    myPreProcessorTypes.push_back(InitCompSP27);
-
-    myKernelGroup = BasicKernel;
 
-    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
index d466b7696a72976e7b17ab43601189d4f75eaf94..af8470b717ad7a98e7a3fcd507976353d9e8bd41 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
@@ -3,13 +3,21 @@
 
 #include "Kernel/KernelImp.h"
 
+namespace vf
+{
+namespace gpu
+{
+
+
 class CumulantK17Unified : public KernelImp
 {
 public:
-    static std::shared_ptr<CumulantK17Unified> getNewInstance(std::shared_ptr<Parameter> para, int level);
-    void run();
-
     CumulantK17Unified(std::shared_ptr<Parameter> para, int level);
+
+    void run();
 };
 
+}
+}
+
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
index 5bdf2e9bb0f00e3e2ae3642baa42545079f93c2c..c9a6675bd7a4b82442c55b23bac6f9b49b811938 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
@@ -15,4 +15,4 @@ private:
     FluidFlowCompStrategy();
 
 };
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
index 65305549e6ed3bf41432e70175caef5165aacf8e..b4097851b251b7447f6ce06856d0b9187999a20b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
@@ -5,7 +5,6 @@
 #include <DataTypes.h>
 #include <cuda_runtime.h>
 
-#include <lbm/Distribution27.h>
 #include <lbm/KernelParameter.h>
 
 #include "Kernel/Utilities/DistributionHelper.cuh"
@@ -32,13 +31,13 @@ struct GPUKernelParameter
 template<typename KernelFunctor>
 __global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter)
 {
-    const uint k = vf::gpu::getNodeIndex();
+    const uint k = getNodeIndex();
     const uint nodeType = kernelParameter.typeOfGridNode[k];
 
-    if (!vf::gpu::isValidFluidNode(k, kernelParameter.size_Mat, nodeType))
+    if (!isValidFluidNode(k, kernelParameter.size_Mat, nodeType))
         return;
 
-    vf::gpu::DistributionWrapper distributionWrapper {
+    DistributionWrapper distributionWrapper {
         kernelParameter.distributions,
         kernelParameter.size_Mat,
         kernelParameter.isEvenTimestep,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
index dd68ff05162f65552764ffda8a08d015a5d4a171..bbb01d95410612d36d55f1e0113175a8741b9ade 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -164,19 +164,6 @@ __device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType)
            (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
 }
 
-__device__ void getLevelForce(real fx, real fy, real fz, int level, real *forces)
-{
-    real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. };
-    for (int i = 0; i < level; i++) {
-        fx_t *= vf::lbm::constant::c2o1;
-        fy_t *= vf::lbm::constant::c2o1;
-        fz_t *= vf::lbm::constant::c2o1;
-    }
-
-    forces[0] = fx / fx_t;
-    forces[1] = fy / fy_t;
-    forces[2] = fz / fz_t;
-}
 
 } // namespace gpu
 } // namespace vf
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
index 0e9173d9c39629faa65e4fcd14b976de4b283ffd..935030701924233d959fb69b74a7c3087feb0834 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -35,7 +35,7 @@
 
 #include "LBM/LB.h" 
 
-#include <lbm/Distribution27.h>
+#include <lbm/KernelParameter.h>
 
 namespace vf
 {
@@ -92,8 +92,6 @@ __device__ unsigned int getNodeIndex();
 
 __device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType);
 
-__device__ void getLevelForce(real fx, real fy, real fz, int level, real* forces);
-
 }
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index efad7278d3e9d8b06355d9e8d4f230fa2c3c36b0..5f63df1c9afc17a62a9a47ce82401ebba4453872 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -106,8 +106,8 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     if (kernel == "BGKCompSP27") {
         newKernel     = BGKCompSP27::getNewInstance(para, level);   // compressible
         checkStrategy = FluidFlowCompStrategy::getInstance();       //      ||
-    } else if (kernel == "BGKUnified") {                            //     \/
-        newKernel     = BGKUnified::getNewInstance(para, level);
+    } else if (kernel == "BGKUnified") {                            //      \/
+        newKernel     = std::make_shared<vf::gpu::BGKUnified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "BGKPlusCompSP27") {
         newKernel     = BGKPlusCompSP27::getNewInstance(para, level);
@@ -125,10 +125,10 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
         newKernel     = CumulantK17Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK15Unified") {
-        newKernel     = CumulantK15Unified::getNewInstance(para, level);
+        newKernel     = std::make_shared<vf::gpu::CumulantK15Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK17Unified") {
-        newKernel     = CumulantK17Unified::getNewInstance(para, level);
+        newKernel     = std::make_shared<vf::gpu::CumulantK17Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK17BulkComp") {
         newKernel     = CumulantK17BulkComp::getNewInstance(para, level);
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index ab904167671f6525d4ea0bd0f459f4b01742b910..7dc916a47cbe08e543cc73d3c6c6cf5f163a16e7 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <vector>
 
+#include <helper_timer.h>
+
 #include "LBM/LB.h"
 #include "Communication/Communicator.h"
 #include "Communication/ExchangeData27.h"
diff --git a/src/lbm/BGK.cpp b/src/lbm/BGK.cpp
index 407807cf52dae2526e3782c72565852ce64a34d5..fa3af6777a0492687768dd4945cbf1e9b186f514 100644
--- a/src/lbm/BGK.cpp
+++ b/src/lbm/BGK.cpp
@@ -1,6 +1,5 @@
 #include "BGK.h"
 
-#include <cmath>
 
 #include <basics/Core/DataTypes.h>
 #include <basics/Core/RealConstants.h>
@@ -8,7 +7,6 @@
 #include "constants/NumericConstants.h"
 #include "constants/D3Q27.h"
 
-#include "Chimera.h"
 #include "MacroscopicQuantities.h"
 
 namespace vf
@@ -25,7 +23,6 @@ __host__ __device__ void bgk(KernelParameter parameter)
     auto& distribution = parameter.distribution;
     const auto omega = parameter.omega;
 
-
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Read distributions: style of reading and writing the distributions from/to 
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
@@ -61,53 +58,39 @@ __host__ __device__ void bgk(KernelParameter parameter)
 
 
     ////////////////////////////////////////////////////////////////////////////////////
-    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
-    //!
-    const real drho =
-        ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-        (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-        ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; 
+    //! - Acquire macroscopic quantities
+    const real drho = getDensity(distribution.f);
     const real rho = c1o1 + drho;
-    const real OOrho = c1o1 / rho;    
-    const real vvx = 
-        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-        (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-        (mfcbb - mfabb)) * OOrho;
-    const real vvy = 
-        ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-        (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-        (mfbcb - mfbab)) * OOrho;
-    const real vvz = 
-        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-        (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-        (mfbbc - mfbba)) * OOrho;
-
-
-    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //BGK comp
-    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    const real OOrho = constant::c1o1 / (constant::c1o1 + drho);    
+
+    const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho;
+    const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho;
+    const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho;
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - BGK computation
     const real cusq = c3o2*(vvx*vvx + vvy*vvy + vvz*vvz);
-    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    mfbbb = mfbbb *(c1o1 + (-omega)) - (-omega)*   c8o27*  (drho - rho * cusq);
-    mfcbb = mfcbb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq));
-    mfabb = mfabb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq));
-    mfbcb = mfbcb    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq));
-    mfbab = mfbab    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq));
-    mfbbc = mfbbc    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq));
-    mfbba = mfbba    *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq));
-    mfccb = mfccb   *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq));
-    mfaab = mfaab   *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq));
-    mfcab = mfcab   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq));
-    mfacb = mfacb   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq));
-    mfcbc = mfcbc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq));
-    mfaba = mfaba   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq));
-    mfcba = mfcba   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq));
-    mfabc = mfabc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq));
-    mfbcc = mfbcc   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq));
-    mfbaa = mfbaa   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq));
-    mfbca = mfbca   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq));
-    mfbac = mfbac   *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq));
+
+    mfbbb = mfbbb  *(c1o1 + (-omega)) - (-omega)*   c8o27*  (drho - rho * cusq);
+    mfcbb = mfcbb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq));
+    mfabb = mfabb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq));
+    mfbcb = mfbcb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq));
+    mfbab = mfbab  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq));
+    mfbbc = mfbbc  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq));
+    mfbba = mfbba  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq));
+    mfccb = mfccb  *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq));
+    mfaab = mfaab  *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq));
+    mfcab = mfcab  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq));
+    mfacb = mfacb  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq));
+    mfcbc = mfcbc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq));
+    mfaba = mfaba  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq));
+    mfcba = mfcba  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq));
+    mfabc = mfabc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq));
+    mfbcc = mfbcc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq));
+    mfbaa = mfbaa  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq));
+    mfbca = mfbca  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq));
+    mfbac = mfbac  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq));
     mfccc = mfccc  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy + vvz) + c9o2*(vvx + vvy + vvz)*(vvx + vvy + vvz) - cusq));
     mfaaa = mfaaa  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx - vvy - vvz) + c9o2*(-vvx - vvy - vvz)*(-vvx - vvy - vvz) - cusq));
     mfcca = mfcca  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy - vvz) + c9o2*(vvx + vvy - vvz)*(vvx + vvy - vvz) - cusq));
@@ -122,33 +105,33 @@ __host__ __device__ void bgk(KernelParameter parameter)
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
     //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
     //!
-    distribution.f[vf::lbm::dir::MZZ] = mfcbb;
-    distribution.f[vf::lbm::dir::PZZ] = mfabb;
-    distribution.f[vf::lbm::dir::ZMZ] = mfbcb;
-    distribution.f[vf::lbm::dir::ZPZ] = mfbab;
-    distribution.f[vf::lbm::dir::ZZM] = mfbbc;
-    distribution.f[vf::lbm::dir::ZZP] = mfbba;
-    distribution.f[vf::lbm::dir::MMZ] = mfccb;
-    distribution.f[vf::lbm::dir::PPZ] = mfaab;
-    distribution.f[vf::lbm::dir::MPZ] = mfcab;
-    distribution.f[vf::lbm::dir::PMZ] = mfacb;
-    distribution.f[vf::lbm::dir::MZM] = mfcbc;
-    distribution.f[vf::lbm::dir::PZP] = mfaba;
-    distribution.f[vf::lbm::dir::MZP] = mfcba;
-    distribution.f[vf::lbm::dir::PZM] = mfabc;
-    distribution.f[vf::lbm::dir::ZMM] = mfbcc;
-    distribution.f[vf::lbm::dir::ZPP] = mfbaa;
-    distribution.f[vf::lbm::dir::ZMP] = mfbca;
-    distribution.f[vf::lbm::dir::ZPM] = mfbac;
-    distribution.f[vf::lbm::dir::MMM] = mfccc;
-    distribution.f[vf::lbm::dir::PMM] = mfacc;
-    distribution.f[vf::lbm::dir::MPM] = mfcac;
-    distribution.f[vf::lbm::dir::PPM] = mfaac;
-    distribution.f[vf::lbm::dir::MMP] = mfcca;
-    distribution.f[vf::lbm::dir::PMP] = mfaca;
-    distribution.f[vf::lbm::dir::MPP] = mfcaa;
-    distribution.f[vf::lbm::dir::PPP] = mfaaa;
-    distribution.f[vf::lbm::dir::ZZZ] = mfbbb;
+    distribution.f[dir::MZZ] = mfcbb;
+    distribution.f[dir::PZZ] = mfabb;
+    distribution.f[dir::ZMZ] = mfbcb;
+    distribution.f[dir::ZPZ] = mfbab;
+    distribution.f[dir::ZZM] = mfbbc;
+    distribution.f[dir::ZZP] = mfbba;
+    distribution.f[dir::MMZ] = mfccb;
+    distribution.f[dir::PPZ] = mfaab;
+    distribution.f[dir::MPZ] = mfcab;
+    distribution.f[dir::PMZ] = mfacb;
+    distribution.f[dir::MZM] = mfcbc;
+    distribution.f[dir::PZP] = mfaba;
+    distribution.f[dir::MZP] = mfcba;
+    distribution.f[dir::PZM] = mfabc;
+    distribution.f[dir::ZMM] = mfbcc;
+    distribution.f[dir::ZPP] = mfbaa;
+    distribution.f[dir::ZMP] = mfbca;
+    distribution.f[dir::ZPM] = mfbac;
+    distribution.f[dir::MMM] = mfccc;
+    distribution.f[dir::PMM] = mfacc;
+    distribution.f[dir::MPM] = mfcac;
+    distribution.f[dir::PPM] = mfaac;
+    distribution.f[dir::MMP] = mfcca;
+    distribution.f[dir::PMP] = mfaca;
+    distribution.f[dir::MPP] = mfcaa;
+    distribution.f[dir::PPP] = mfaaa;
+    distribution.f[dir::ZZZ] = mfbbb;
 }
 
 
diff --git a/src/lbm/CumulantChimera.cpp b/src/lbm/CumulantChimera.cpp
index 77133d1b698767b9569c91ac1b7d68a68dfeb58b..65dc9b1f82f409447fda54fc0bc5f460533aad1b 100644
--- a/src/lbm/CumulantChimera.cpp
+++ b/src/lbm/CumulantChimera.cpp
@@ -110,48 +110,24 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
     real mfaaa = distribution.f[dir::MMM];
     real mfbbb = distribution.f[dir::ZZZ];
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
-    //!
-    const real drho =
-        ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-        (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-        ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb; 
-    const real rho = c1o1 + drho;
-    const real OOrho = c1o1 / rho;    
-    real vvx = 
-        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-        (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-        (mfcbb - mfabb)) * OOrho;
-    real vvy = 
-        ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-        (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-        (mfbcb - mfbab)) * OOrho;
-    real vvz = 
-        ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-        (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-        (mfbbc - mfbba)) * OOrho;
+
+    const real drho = getDensity(distribution.f);
+    const real OOrho = c1o1 / (c1o1 + drho);    
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
     //!
-    vvx += forces[0] * c1o2;
-    vvy += forces[1] * c1o2;
-    vvz += forces[2] * c1o2;
+    const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho + forces[0] * c1o2;
+    const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho + forces[0] * c1o2;
+    const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho + forces[0] * c1o2;
+
     ////////////////////////////////////////////////////////////////////////////////////
     // calculate the square of velocities for this lattice node
-    real vx2 = vvx*vvx;
-    real vy2 = vvy*vvy;
-    real vz2 = vvz*vvz;
-    ////////////////////////////////////////////////////////////////////////////////////
-    //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref
-    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
-    //!
-    real wadjust;
-    real qudricLimitP = c1o100;
-    real qudricLimitM = c1o100;
-    real qudricLimitD = c1o100;
+    const real vx2 = vvx*vvx;
+    const real vy2 = vvy*vvy;
+    const real vz2 = vvz*vvz;
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
@@ -209,8 +185,8 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
     //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity).
     //!
-    real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega);
-    real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega);   
+    const real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega);
+    const real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega);   
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Compute cumulants from central moments according to Eq. (20)-(23) in
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
@@ -267,12 +243,12 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
     //! Note that the division by rho is omitted here as we need rho times the gradients later.
     //!
-    real Dxy = -c3o1*omega*mfbba;
-    real Dxz = -c3o1*omega*mfbab;
-    real Dyz = -c3o1*omega*mfabb;
-    real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
-    real dyuy = dxux + omega * c3o2 * mxxMyy;
-    real dzuz = dxux + omega * c3o2 * mxxMzz;
+    const real Dxy = -c3o1*omega*mfbba;
+    const real Dxz = -c3o1*omega*mfbab;
+    const real Dyz = -c3o1*omega*mfabb;
+    const real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+    const real dyuy = dxux + omega * c3o2 * mxxMyy;
+    const real dzuz = dxux + omega * c3o2 * mxxMzz;
     ////////////////////////////////////////////////////////////
     //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
@@ -293,10 +269,16 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
     //relax
     //////////////////////////////////////////////////////////////////////////
     // incl. limiter
+    //! Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref
     //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
     //!
-    wadjust   = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD);
+
+    const real qudricLimitP = c1o100;
+    const real qudricLimitM = c1o100;
+    const real qudricLimitD = c1o100;
+
+    real wadjust   = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD);
     mfbbb    += wadjust * (-mfbbb);
     wadjust   = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxyPyzz) / (abs_internal(mxxyPyzz) + qudricLimitP);
     mxxyPyzz += wadjust * (-mxxyPyzz);
@@ -436,33 +418,33 @@ __host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRa
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
     //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
     //!
-    distribution.f[vf::lbm::dir::MZZ] = mfcbb;
-    distribution.f[vf::lbm::dir::PZZ] = mfabb;
-    distribution.f[vf::lbm::dir::ZMZ] = mfbcb;
-    distribution.f[vf::lbm::dir::ZPZ] = mfbab;
-    distribution.f[vf::lbm::dir::ZZM] = mfbbc;
-    distribution.f[vf::lbm::dir::ZZP] = mfbba;
-    distribution.f[vf::lbm::dir::MMZ] = mfccb;
-    distribution.f[vf::lbm::dir::PPZ] = mfaab;
-    distribution.f[vf::lbm::dir::MPZ] = mfcab;
-    distribution.f[vf::lbm::dir::PMZ] = mfacb;
-    distribution.f[vf::lbm::dir::MZM] = mfcbc;
-    distribution.f[vf::lbm::dir::PZP] = mfaba;
-    distribution.f[vf::lbm::dir::MZP] = mfcba;
-    distribution.f[vf::lbm::dir::PZM] = mfabc;
-    distribution.f[vf::lbm::dir::ZMM] = mfbcc;
-    distribution.f[vf::lbm::dir::ZPP] = mfbaa;
-    distribution.f[vf::lbm::dir::ZMP] = mfbca;
-    distribution.f[vf::lbm::dir::ZPM] = mfbac;
-    distribution.f[vf::lbm::dir::MMM] = mfccc;
-    distribution.f[vf::lbm::dir::PMM] = mfacc;
-    distribution.f[vf::lbm::dir::MPM] = mfcac;
-    distribution.f[vf::lbm::dir::PPM] = mfaac;
-    distribution.f[vf::lbm::dir::MMP] = mfcca;
-    distribution.f[vf::lbm::dir::PMP] = mfaca;
-    distribution.f[vf::lbm::dir::MPP] = mfcaa;
-    distribution.f[vf::lbm::dir::PPP] = mfaaa;
-    distribution.f[vf::lbm::dir::ZZZ] = mfbbb;
+    distribution.f[dir::MZZ] = mfcbb;
+    distribution.f[dir::PZZ] = mfabb;
+    distribution.f[dir::ZMZ] = mfbcb;
+    distribution.f[dir::ZPZ] = mfbab;
+    distribution.f[dir::ZZM] = mfbbc;
+    distribution.f[dir::ZZP] = mfbba;
+    distribution.f[dir::MMZ] = mfccb;
+    distribution.f[dir::PPZ] = mfaab;
+    distribution.f[dir::MPZ] = mfcab;
+    distribution.f[dir::PMZ] = mfacb;
+    distribution.f[dir::MZM] = mfcbc;
+    distribution.f[dir::PZP] = mfaba;
+    distribution.f[dir::MZP] = mfcba;
+    distribution.f[dir::PZM] = mfabc;
+    distribution.f[dir::ZMM] = mfbcc;
+    distribution.f[dir::ZPP] = mfbaa;
+    distribution.f[dir::ZMP] = mfbca;
+    distribution.f[dir::ZPM] = mfbac;
+    distribution.f[dir::MMM] = mfccc;
+    distribution.f[dir::PMM] = mfacc;
+    distribution.f[dir::MPM] = mfcac;
+    distribution.f[dir::PPM] = mfaac;
+    distribution.f[dir::MMP] = mfcca;
+    distribution.f[dir::PMP] = mfaca;
+    distribution.f[dir::MPP] = mfcaa;
+    distribution.f[dir::PPP] = mfaaa;
+    distribution.f[dir::ZZZ] = mfbbb;
 }
 
 
diff --git a/src/lbm/Distribution27.h b/src/lbm/Distribution27.h
deleted file mode 100644
index cebd2468bf111439d6bf9820ff39b883f4289a1e..0000000000000000000000000000000000000000
--- a/src/lbm/Distribution27.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef LBM_DISTRIBUTION_27_H
-#define LBM_DISTRIBUTION_27_H
-
-#ifndef __host__
-#define __host__
-#endif
-#ifndef __device__
-#define __device__
-#endif
-
-
-#include <basics/Core/DataTypes.h>
-
-namespace vf
-{
-namespace lbm
-{
-
-
-struct Distribution27
-{
-    real f[27];
-
-    __host__ __device__ real getDensity_() const;
-};
-
-
-__host__ __device__ real abs_internal(real value);
-
-
-}
-}
-
-#endif
diff --git a/src/lbm/Distribution27.cpp b/src/lbm/KernelParameter.cpp
similarity index 91%
rename from src/lbm/Distribution27.cpp
rename to src/lbm/KernelParameter.cpp
index a84dc9c7fb355d6d642fb13ec0486bad17f7e20e..15a0f9c445fa4fc7b1fba4b0482da29068b72951 100644
--- a/src/lbm/Distribution27.cpp
+++ b/src/lbm/KernelParameter.cpp
@@ -1,4 +1,4 @@
-#include "Distribution27.h"
+#include "KernelParameter.h"
 
 
 #include "MacroscopicQuantities.h"
diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h
index b60e43439dda3f2c592b67b6f9491d010891a32c..95226628110637f3794c8a1f7e6f6c1f6dda937b 100644
--- a/src/lbm/KernelParameter.h
+++ b/src/lbm/KernelParameter.h
@@ -1,5 +1,5 @@
-#ifndef LBM_CUMULANT_CHIMERA_PARAMETER_H
-#define LBM_CUMULANT_CHIMERA_PARAMETER_H
+#ifndef LBM_KERNEL_PARAMETER_H
+#define LBM_KERNEL_PARAMETER_H
 
 #ifndef __host__
 #define __host__
@@ -10,13 +10,22 @@
 
 #include <basics/Core/DataTypes.h>
 
-#include "Distribution27.h"
 
 namespace vf
 {
 namespace lbm
 {
 
+struct Distribution27
+{
+    real f[27];
+
+    __host__ __device__ real getDensity_() const;
+};
+
+
+__host__ __device__ real abs_internal(real value);
+
 
 struct KernelParameter
 {
@@ -27,6 +36,7 @@ struct KernelParameter
 
 
 
+
 }
 }
 
diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h
index 1a440b5322ca2b4ac63c68e35b2442551b5003f0..c37791294ff5b4edad21795f4ce0a32a18c5d236 100644
--- a/src/lbm/MacroscopicQuantities.h
+++ b/src/lbm/MacroscopicQuantities.h
@@ -17,7 +17,11 @@ namespace vf
 {
 namespace lbm
 {
-
+    
+////////////////////////////////////////////////////////////////////////////////////
+//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+//!
 
 inline __host__ __device__ real getDensity(const real *const &f /*[27]*/)
 {
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
index 7b1741478550f3d297fe1a8254de63c902888125..be16988a480650cbab416652655af9766bcf8ec7 100644
--- a/src/lbm/cuda/CMakeLists.txt
+++ b/src/lbm/cuda/CMakeLists.txt
@@ -6,6 +6,7 @@ vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm
 
 set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
-set_source_files_properties(../Distribution27.cpp PROPERTIES LANGUAGE CUDA)
+set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
+
 set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
 set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA)
\ No newline at end of file