From ff9502742c6b5024aa1731e1ec8aba6f25c413c3 Mon Sep 17 00:00:00 2001
From: peters <peters@irmb.tu-bs.de>
Date: Thu, 27 May 2021 12:01:57 +0200
Subject: [PATCH] Add generic LBM kernel, which gets a lambda expression as a
 paramter for the concrete calculations

---
 apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp    |  5 +-
 .../LBM/CumulantK17LBMKernelUnified.cpp       |  5 +-
 src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h  |  4 +
 .../CumulantK15/CumulantK15Comp_Device.cu     | 34 ---------
 .../CumulantK15/CumulantK15Comp_Device.cuh    |  2 +-
 .../CumulantK15Unified/CumulantK15Unified.cu  | 52 +++++++++++++
 .../CumulantK15Unified/CumulantK15Unified.h   | 14 ++++
 .../CumulantK17Unified/CumulantK17Unified.cu  | 56 +++++++-------
 .../CumulantK17Unified/CumulantK17Unified.h   |  2 -
 .../CumulantK17Unified_Device.cuh             | 20 -----
 .../CumulantK17Unified_device.cu              | 76 -------------------
 .../FluidFlow/Compressible/CumulantKernel.cu  | 14 ++++
 .../FluidFlow/Compressible/CumulantKernel.cuh | 65 ++++++++++++++++
 .../Kernel/Utilities/CudaGrid.cpp             | 27 +++++++
 .../Kernel/Utilities/CudaGrid.h               | 25 ++++++
 .../KernelFactory/KernelFactoryImp.cpp        |  4 +
 src/lbm/CumulantChimeraK15.cpp                |  6 +-
 src/lbm/CumulantChimeraK15.h                  |  7 +-
 src/lbm/CumulantChimeraK17.cpp                |  6 +-
 src/lbm/CumulantChimeraK17.h                  |  6 +-
 src/lbm/CumulantChimeraParameter.h            | 33 ++++++++
 21 files changed, 287 insertions(+), 176 deletions(-)
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
 delete mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh
 delete mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cu
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
 create mode 100644 src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
 create mode 100644 src/lbm/CumulantChimeraParameter.h

diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 070d94f6a..fc12e73a1 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -97,7 +97,8 @@ const real dt = (real)1.0e-3; //0.5e-3;
 const uint nx = 64;
 
 //std::string path("F:/Work/Computations/out/DrivenCavity/"); //LEGOLAS
-std::string path("D:/out/DrivenCavity"); //Mollok
+//std::string path("D:/out/DrivenCavity"); //Mollok
+std::string path("/home/sopeters/Computations/out/DrivenCavity64_unified"); // phoenix
 
 std::string simulationName("DrivenCavityChim");
 
@@ -186,7 +187,7 @@ void multipleLevel(const std::string& configPath)
 
         para->setVelocityRatio(velocity/ velocityLB);
 
-		para->setMainKernel("CumulantK17CompChim");
+		//para->setMainKernel("CumulantK17CompChim");
 
 		para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
             rho = (real)0.0;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
index d36ec956f..1b96b09ea 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
@@ -40,7 +40,9 @@
 #include "BCArray3D.h"
 
 //#include <lbm/CumulantChimera.h>
+#include <lbm/CumulantChimeraParameter.h>
 #include <lbm/CumulantChimeraK17.h>
+#include <lbm/constants/D3Q27.h>
 
 //#define PROOF_CORRECTNESS
 
@@ -244,7 +246,8 @@ void CumulantK17LBMKernelUnified::calculate(int step)
                     distribution.f[vf::lbm::dir::MMM] = mfaaa;
                     distribution.f[vf::lbm::dir::ZZZ] = mfbbb;
 
-                    vf::lbm::cumulantChimeraK17(distribution, omega, forces);
+                    vf::lbm::CumulantChimeraParameter chimeraParameter {distribution, omega, forces};
+                    vf::lbm::cumulantChimeraK17(chimeraParameter);
 
                     mfcbb = distribution.f[vf::lbm::dir::PZZ];
                     mfabb = distribution.f[vf::lbm::dir::MZZ];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index f902a7aae..2795ab94e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -5,6 +5,8 @@
 
 #include <memory>
 
+#include "Utilities/CudaGrid.h"
+
 class CheckParameterStrategy;
 class Parameter;
 
@@ -28,5 +30,7 @@ protected:
 	std::vector<PreProcessorType> myPreProcessorTypes;
 	KernelGroup myKernelGroup;
 
+	vf::gpu::CudaGrid cudaGrid;
+
 };
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
index 8011615dd..7349be584 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
@@ -2,44 +2,10 @@
 #include "LBM/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
 
-#include <lbm/CumulantChimeraK15.h>
-
-#include "Kernel/Utilities/DistributionHelper.cuh"
-
 using namespace vf::lbm::constant;
 #include "math.h"
 
 extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
-    unsigned int* typeOfGridNode,
-    unsigned int* neighborX,
-    unsigned int* neighborY,
-    unsigned int* neighborZ,
-    real* distributions,
-    int size_Mat,
-    int level,
-    real* forces,
-    bool isEvenTimestep)
-{
-    const uint k = vf::gpu::getNodeIndex();
-    const uint nodeType = typeOfGridNode[k];
-
-    if (!vf::gpu::isValidFluidNode(k, size_Mat, nodeType))
-        return;
-
-    vf::gpu::DistributionWrapper distributionWrapper {
-        distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ
-    };
-
-    real level_forces[3];
-    vf::gpu::getLevelForce(forces[0], forces[1], forces[2], level, level_forces);
-
-    vf::lbm::cumulantChimeraK15(distributionWrapper.distribution, omega, level_forces);
-
-    distributionWrapper.write();
-}
-
-
-extern "C" __global__ void LB_Kernel_CumulantK15Comp_(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
index 6a8eede33..1a3d00e3e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15Comp(	real omega,
+__global__ void LB_Kernel_CumulantK15Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
new file mode 100644
index 000000000..c40b6d837
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -0,0 +1,52 @@
+#include "CumulantK15Unified.h"
+
+#include "../CumulantKernel.cuh"
+
+#include "Parameter/Parameter.h"
+
+#include <lbm/CumulantChimeraK15.h>
+#include <lbm/Distribution27.h>
+
+std::shared_ptr<CumulantK15Unified> CumulantK15Unified::getNewInstance(std::shared_ptr<Parameter> para, int level)
+{
+    return std::make_shared<CumulantK15Unified>(para, level);
+}
+
+void CumulantK15Unified::run()
+{
+    vf::gpu::LBMKernelParameter kernelParameter{ para->getParD(level)->omega,
+                                                 para->getParD(level)->geoSP,
+                                                 para->getParD(level)->neighborX_SP,
+                                                 para->getParD(level)->neighborY_SP,
+                                                 para->getParD(level)->neighborZ_SP,
+                                                 para->getParD(level)->d0SP.f[0],
+                                                 (int)para->getParD(level)->size_Mat_SP,
+                                                 level,
+                                                 para->getForcesDev(),
+                                                 para->getParD(level)->evenOrOdd };
+
+    auto lambda = [] __device__(vf::lbm::CumulantChimeraParameter parameter) {
+        return vf::lbm::cumulantChimeraK15(parameter);
+    };
+
+    vf::gpu::cumulantKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+
+    getLastCudaError("LB_Kernel_CumulantK15Comp execution failed");
+}
+
+CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument(
+        "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    this->para  = para;
+    this->level = level;
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
new file mode 100644
index 000000000..666a605c4
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
@@ -0,0 +1,14 @@
+#ifndef CUMULANT_K15_UNIFIED_COMP_H
+#define CUMULANT_K15_UNIFIED_COMP_H
+
+#include "Kernel/KernelImp.h"
+
+class CumulantK15Unified : public KernelImp
+{
+public:
+    static std::shared_ptr<CumulantK15Unified> getNewInstance(std::shared_ptr<Parameter> para, int level);
+    void run();
+
+    CumulantK15Unified(std::shared_ptr<Parameter> para, int level);
+};
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index dae9436bf..acff5a6dd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -1,43 +1,37 @@
 #include "CumulantK17Unified.h"
 
-#include "CumulantK17Unified_Device.cuh"
 #include "Parameter/Parameter.h"
-
+#include "../CumulantKernel.cuh"
+#include "Kernel/Utilities/CudaGrid.h"
 #include <stdexcept>
 
+#include <lbm/CumulantChimeraK17.h>
+
 std::shared_ptr<CumulantK17Unified> CumulantK17Unified::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
-    return std::shared_ptr<CumulantK17Unified>(new CumulantK17Unified(para, level));
+    return std::make_shared<CumulantK17Unified>(para, level);
 }
 
 void CumulantK17Unified::run()
 {
-    int numberOfThreads = para->getParD(level)->numberofthreads;
-    int size_Mat        = para->getParD(level)->size_Mat_SP;
-
-    int Grid = (size_Mat / numberOfThreads) + 1;
-    int Grid1, Grid2;
-    if (Grid > 512) {
-        Grid1 = 512;
-        Grid2 = (Grid / Grid1) + 1;
-    } else {
-        Grid1 = 1;
-        Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1);
-
-    vf::gpu::LB_Kernel_CumulantK17Unified<<<grid, threads>>>(
-        para->getParD(level)->omega,
-        para->getParD(level)->geoSP,
-        para->getParD(level)->neighborX_SP,
-        para->getParD(level)->neighborY_SP,
-        para->getParD(level)->neighborZ_SP,
-        para->getParD(level)->d0SP.f[0],
-        para->getParD(level)->size_Mat_SP,
-        level,
-        para->getForcesDev(),
-        para->getParD(level)->evenOrOdd);
+    vf::gpu::LBMKernelParameter kernelParameter
+	{	para->getParD(level)->omega,
+		para->getParD(level)->geoSP,
+		para->getParD(level)->neighborX_SP,
+		para->getParD(level)->neighborY_SP,
+		para->getParD(level)->neighborZ_SP,
+		para->getParD(level)->d0SP.f[0],
+		(int)para->getParD(level)->size_Mat_SP,
+		level,
+		para->getForcesDev(),
+		para->getParD(level)->evenOrOdd
+	};
+
+	auto lambda = [] __device__(vf::lbm::CumulantChimeraParameter parameter) {
+		return vf::lbm::cumulantChimeraK17(parameter);
+	};
+
+	vf::gpu::cumulantKernel<<< cudaGrid.grid, cudaGrid.threads >>>(lambda, kernelParameter);
 
     getLastCudaError("LB_Kernel_CumulantK17Unified execution failed");
 }
@@ -54,4 +48,6 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
     myPreProcessorTypes.push_back(InitCompSP27);
 
     myKernelGroup = BasicKernel;
-}
\ No newline at end of file
+
+    this->cudaGrid = vf::gpu::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
index ece7c66ec..d466b7696 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
@@ -9,8 +9,6 @@ public:
     static std::shared_ptr<CumulantK17Unified> getNewInstance(std::shared_ptr<Parameter> para, int level);
     void run();
 
-private:
-    CumulantK17Unified();
     CumulantK17Unified(std::shared_ptr<Parameter> para, int level);
 };
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh
deleted file mode 100644
index ed8cc6e66..000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_Device.cuh
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef LB_Kernel_CUMULANT_K17_UNIFIED_H
-#define LB_Kernel_CUMULANT_K17_UNIFIED_H
-
-#include <DataTypes.h>
-#include <cuda_runtime.h>
-
-
-namespace vf
-{
-namespace gpu 
-{
-__global__ void LB_Kernel_CumulantK17Unified(real omega, unsigned int *bcMatD, unsigned int *neighborX,
-                                                        unsigned int *neighborY, unsigned int *neighborZ, real *DDStart,
-                                                        int size_Mat, int level, real *forces,
-                                                        bool EvenOrOdd);
-
-}
-}
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu
deleted file mode 100644
index ec15a2a76..000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified_device.cu
+++ /dev/null
@@ -1,76 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
-//  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-//  for more details.
-//  
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file Cumulant27chim.cu
-//! \ingroup GPU
-//! \author Martin Schoenherr, Soeren Peters
-//=======================================================================================
-/* Device code */
-#include <lbm/CumulantChimeraK17.h>
-
-#include "Kernel/Utilities/DistributionHelper.cuh"
-
-
-namespace vf
-{
-namespace gpu 
-{
-
-__global__ void LB_Kernel_CumulantK17Unified(
-    real omega,
-    uint* typeOfGridNode,
-    uint* neighborX,
-    uint* neighborY,
-    uint* neighborZ,
-    real* distributions,
-    int size_Mat,
-    int level,
-    real* forces,
-    bool isEvenTimestep)
-{
-    const uint k = getNodeIndex();
-    const uint nodeType = typeOfGridNode[k];
-
-    if (!isValidFluidNode(k, size_Mat, nodeType))
-        return;
-
-    DistributionWrapper distributionWrapper {
-        distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ
-    };
-
-    real level_forces[3];
-    getLevelForce(forces[0], forces[1], forces[2], level, level_forces);
-
-    vf::lbm::cumulantChimeraK17(distributionWrapper.distribution, omega, level_forces);
-
-    distributionWrapper.write();
-}
-
-
-}
-}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cu
new file mode 100644
index 000000000..945ab2474
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cu
@@ -0,0 +1,14 @@
+#include "CumulantKernel.cuh"
+
+
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
+
+namespace vf
+{
+namespace gpu
+{
+
+
+}
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
new file mode 100644
index 000000000..53104c5bd
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantKernel.cuh
@@ -0,0 +1,65 @@
+#ifndef GPU_CUMULANT_KERNEL_H
+#define GPU_CUMULANT_KERNEL_H
+
+
+#include <DataTypes.h>
+#include <cuda_runtime.h>
+
+#include <lbm/Distribution27.h>
+#include <lbm/CumulantChimeraParameter.h>
+
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
+namespace vf
+{
+namespace gpu
+{
+
+
+struct LBMKernelParameter
+{
+    real omega;
+    unsigned int* typeOfGridNode;
+    unsigned int* neighborX;
+    unsigned int* neighborY;
+    unsigned int* neighborZ;
+    real* distributions;
+    int size_Mat;
+    int level;
+    real* forces;
+    bool isEvenTimestep;
+};
+
+template<typename KernelFunctor>
+__global__ void cumulantKernel(KernelFunctor kernel, LBMKernelParameter kernelParameter)
+{
+    const uint k = vf::gpu::getNodeIndex();
+    const uint nodeType = kernelParameter.typeOfGridNode[k];
+
+    if (!vf::gpu::isValidFluidNode(k, kernelParameter.size_Mat, nodeType))
+        return;
+
+    vf::gpu::DistributionWrapper distributionWrapper {
+        kernelParameter.distributions,
+        kernelParameter.size_Mat,
+        kernelParameter.isEvenTimestep,
+        k,
+        kernelParameter.neighborX,
+        kernelParameter.neighborY,
+        kernelParameter.neighborZ
+    };
+
+
+    real level_forces[3];
+    vf::gpu::getLevelForce(kernelParameter.forces[0], kernelParameter.forces[1], kernelParameter.forces[2], kernelParameter.level, level_forces);
+
+    lbm::CumulantChimeraParameter chimeraParameter {distributionWrapper.distribution, kernelParameter.omega, level_forces};
+    kernel(chimeraParameter);
+
+    distributionWrapper.write();
+}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
new file mode 100644
index 000000000..fa17bf449
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
@@ -0,0 +1,27 @@
+#include "CudaGrid.h"
+
+
+
+namespace vf
+{
+namespace gpu
+{
+
+CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int size_matrix)
+{
+    int Grid = (size_matrix / numberOfThreads) + 1;
+    int Grid1, Grid2;
+    if (Grid > 512) {
+        Grid1 = 512;
+        Grid2 = (Grid / Grid1) + 1;
+    } else {
+        Grid1 = 1;
+        Grid2 = Grid;
+    }
+    
+    grid = dim3(Grid1, Grid2);
+    threads = dim3(numberOfThreads, 1, 1);
+}
+
+}
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
new file mode 100644
index 000000000..27a18a588
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
@@ -0,0 +1,25 @@
+#ifndef GPU_CUDA_GRID_H
+#define GPU_CUDA_GRID_H
+
+
+#include <cuda_runtime.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+
+struct CudaGrid 
+{
+    dim3 threads;
+    dim3 grid;
+
+    CudaGrid(unsigned int numberOfEntities, unsigned int threadsPerBlock);
+    CudaGrid() = default;
+};
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index b99d44a85..e56462a07 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -15,6 +15,7 @@
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.h"
@@ -119,6 +120,9 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     } else if (kernel == "CumulantK17Comp") {
         newKernel     = CumulantK17Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "CumulantK15Unified") {
+        newKernel     = CumulantK15Unified::getNewInstance(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK17Unified") {
         newKernel     = CumulantK17Unified::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
diff --git a/src/lbm/CumulantChimeraK15.cpp b/src/lbm/CumulantChimeraK15.cpp
index 66ccf8733..994f239cd 100644
--- a/src/lbm/CumulantChimeraK15.cpp
+++ b/src/lbm/CumulantChimeraK15.cpp
@@ -25,8 +25,12 @@ using namespace constant;
 //! and \ref
 //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
 //////////////////////////////////////////////////////////////////////////
-__host__ __device__ void cumulantChimeraK15(Distribution27& distribution, real omega, real* forces)
+__host__ __device__ void cumulantChimeraK15(CumulantChimeraParameter parameter)
 {
+    auto& distribution = parameter.distribution;
+    const auto omega = parameter.omega;
+    const auto* forces = parameter.forces;
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Read distributions: style of reading and writing the distributions from/to 
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
diff --git a/src/lbm/CumulantChimeraK15.h b/src/lbm/CumulantChimeraK15.h
index e32470c64..8c1ffa957 100644
--- a/src/lbm/CumulantChimeraK15.h
+++ b/src/lbm/CumulantChimeraK15.h
@@ -8,9 +8,7 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
-
-#include "Distribution27.h"
+#include "CumulantChimeraParameter.h"
 
 namespace vf
 {
@@ -23,7 +21,8 @@ namespace lbm
 //! and \ref
 //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
 //////////////////////////////////////////////////////////////////////////
-__host__ __device__ void cumulantChimeraK15(Distribution27& distribution, real omega, real* forces);
+__host__ __device__ void cumulantChimeraK15(CumulantChimeraParameter parameter);
+
 
 }
 }
diff --git a/src/lbm/CumulantChimeraK17.cpp b/src/lbm/CumulantChimeraK17.cpp
index 17a30eaa5..ecd2f4540 100644
--- a/src/lbm/CumulantChimeraK17.cpp
+++ b/src/lbm/CumulantChimeraK17.cpp
@@ -25,8 +25,12 @@ using namespace constant;
 //! and \ref
 //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
 //////////////////////////////////////////////////////////////////////////
-__host__ __device__ void cumulantChimeraK17(Distribution27& distribution, real omega, real* forces)
+__host__ __device__ void cumulantChimeraK17(CumulantChimeraParameter parameter)
 {
+    auto& distribution = parameter.distribution;
+    const auto omega = parameter.omega;
+    const auto* forces = parameter.forces;
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Read distributions: style of reading and writing the distributions from/to 
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
diff --git a/src/lbm/CumulantChimeraK17.h b/src/lbm/CumulantChimeraK17.h
index 3feca5d67..94e5f8bfa 100644
--- a/src/lbm/CumulantChimeraK17.h
+++ b/src/lbm/CumulantChimeraK17.h
@@ -8,9 +8,7 @@
 #define __device__
 #endif
 
-#include <basics/Core/DataTypes.h>
-
-#include "Distribution27.h"
+#include "CumulantChimeraParameter.h"
 
 namespace vf
 {
@@ -23,7 +21,7 @@ namespace lbm
 //! and \ref
 //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
 //////////////////////////////////////////////////////////////////////////
-__host__ __device__ void cumulantChimeraK17(Distribution27& distribution, real omega, real* forces);
+__host__ __device__ void cumulantChimeraK17(CumulantChimeraParameter parameter);
 
 }
 }
diff --git a/src/lbm/CumulantChimeraParameter.h b/src/lbm/CumulantChimeraParameter.h
new file mode 100644
index 000000000..6cb820c57
--- /dev/null
+++ b/src/lbm/CumulantChimeraParameter.h
@@ -0,0 +1,33 @@
+#ifndef LBM_CUMULANT_CHIMERA_PARAMETER_H
+#define LBM_CUMULANT_CHIMERA_PARAMETER_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include "Distribution27.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+
+struct CumulantChimeraParameter
+{
+    Distribution27& distribution;
+    real omega;
+    real* forces;
+};
+
+
+
+}
+}
+
+#endif
-- 
GitLab