diff --git a/.gitignore b/.gitignore
index 578d3f91a2de6cac430328063dc90e85611961e1..d16238c4ac7d45440117af9bc7ac0479a90dae2d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,12 +21,6 @@ __pycache__/
 output/
 logs/
 
-# Input files
-stl/
-
-# Scripts
-scripts/
-
 # scripts
 scripts/
 
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt
index dba88229da2d2fb66dd893f058ece95005fa60e1..51b8db1edf126ebe7e2f3d5808496121270433c5 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt
@@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera
 set_source_files_properties(DrivenCavityMultiGPU.cpp PROPERTIES LANGUAGE CUDA)
 
 set_target_properties(DrivenCavityMultiGPU PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON
-	VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
-    VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:DrivenCavityMultiGPU>\"")
\ No newline at end of file
+	CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/CMakeLists.txt b/apps/gpu/LBM/MusselOyster/CMakeLists.txt
index 2dbbd7a7fcb11b8f619fdcc6bcf5da1b716950b7..595d9ff7250d984f80e8d0d54dad0b11ae7e71e2 100644
--- a/apps/gpu/LBM/MusselOyster/CMakeLists.txt
+++ b/apps/gpu/LBM/MusselOyster/CMakeLists.txt
@@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera
 set_source_files_properties(MusselOyster.cpp PROPERTIES LANGUAGE CUDA)
 
 set_target_properties(MusselOyster PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON
-	VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
-    VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:MusselOyster>\"")
\ No newline at end of file
+	CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat b/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat
deleted file mode 100644
index c574cd5217bc46eb687d81ad2bedb2df53d295fc..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/MusselOyster/MusselOysterMPI.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-:: don't close cmd on error
-if not defined in_subprocess (cmd /k set in_subprocess=y ^& %0 %*) & exit )
-:: @ECHO OFF
-mpiexec -n 2 C:\Users\Master\Documents\MasterAnna\VirtualFluids_dev\build\bin\Release\MusselOyster.exe
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/CMakeLists.txt b/apps/gpu/LBM/SphereScaling/CMakeLists.txt
index 49bee20f7cfc3561c62cf1b36c2f2992e7baada8..db3747f2b620cab1efc5cf50f02aee1a8fee4a54 100644
--- a/apps/gpu/LBM/SphereScaling/CMakeLists.txt
+++ b/apps/gpu/LBM/SphereScaling/CMakeLists.txt
@@ -5,6 +5,4 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera
 set_source_files_properties(SphereScaling.cpp PROPERTIES LANGUAGE CUDA)
 
 set_target_properties(SphereScaling PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON)
-	# VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
-    # VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:SphereScaling>\"")
\ No newline at end of file
+	CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/gpu.cmake b/gpu.cmake
index 2bf803ab7e3725dc648074fe1c250cdc49c1f662..5b70ed9da9cb5f0ac56d09a0f91f0a6b6d13b89a 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -31,14 +31,14 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(targets/apps/LBM/BaselNU)
     #add_subdirectory(targets/apps/LBM/BaselMultiGPU)
 
-    #add_subdirectory(apps/gpu/LBM/DrivenCavity)
+    add_subdirectory(apps/gpu/LBM/DrivenCavity)
     #add_subdirectory(apps/gpu/LBM/WTG_RUB)
     #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
-	add_subdirectory(apps/gpu/LBM/SphereScaling)
-    add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU)
-	add_subdirectory(apps/gpu/LBM/MusselOyster)
+	#add_subdirectory(apps/gpu/LBM/SphereScaling)
+    #add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU)
+	#add_subdirectory(apps/gpu/LBM/MusselOyster)
     #add_subdirectory(apps/gpu/LBM/Poiseuille)
     #add_subdirectory(apps/gpu/LBM/ActuatorLine)
     #add_subdirectory(apps/gpu/LBM/BoundaryLayer)
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index c6116ea37e6a6b17c7c3ded73d3e8478f07c41da..c8f281702f1b8121dfb9f7d3e0d0343f9a0374c4 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -10,7 +10,7 @@
 //! \file ExchangeData27.h
 //! \ingroup GPU
 //! \author Martin Schoenherr, Anna Wellmann
-//! \brief routines for data exchange when running simulations on multiple GPUs
+//! \brief Routines for data exchange when running simulations on multiple GPUs
 
 //////////////////////////////////////////////////////////////////////////
 // 1D domain decomposition
@@ -23,19 +23,19 @@ extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator
 
 // functions used for all directions
 
-//! \brief collect the send nodes in a buffer on the gpu
+//! \brief Collect the send nodes in a buffer on the gpu
 extern "C" void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
                                             std::vector<ProcessNeighbor27> *sendProcessNeighbor,
                                             unsigned int numberOfSendProcessNeighbors);
-//! \brief distribute the receive nodes from the buffer on the gpu
+//! \brief Distribute the receive nodes from the buffer on the gpu
 extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
                                               std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                               unsigned int numberOfRecvProcessNeighbors);
-//! \brief copy nodes which are part of the communication in multiple directions
+//! \brief Copy nodes which are part of the communication in multiple directions
 //! \details The nodes are copied from the receive buffer in one direction to the send buffer in another direction. The
 //! copy operation is conducted on the cpu. 
-//! \ref see master thesis of Anna Wellmann (p. 56f: "Communication Hiding bei
-//! der Verwendung eines uniformen Simulationsgitters") 
+//! See [master thesis of Anna Wellmann (p. 56f: "Communication Hiding bei
+//! der Verwendung eines uniformen Simulationsgitters")]
 //! \param edgeNodes determines from where to where the nodes are
 //! copied 
 //! \param recvProcessNeighborHost is a reference to the receive buffer on the host, nodes are copied from here
@@ -47,16 +47,16 @@ extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositi
 //////////////////////////////////////////////////////////////////////////
 // x
 
-//! \brief collect the send nodes for communication in the x direction in a buffer on the gpu
-//! \details needed to exchange all nodes, used in the communication after collision step
+//! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
+//! \details Needed to exchange all nodes, used in the communication after collision step
 extern "C" void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
-//! \brief collect the send nodes for communication in the x direction in a buffer on the gpu
+//! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
-//! \ref see master thesis of Anna Wellmann
+//! See [master thesis of Anna Wellmann]
 extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
-//! \brief exchange routine in x direction for simulations on multiple gpus
-//! \details send and receive the nodes from the communication buffers on the gpus
+//! \brief Exchange routine in x direction for simulations on multiple gpus
+//! \details Send and receive the nodes from the communication buffers on the gpus.
 //! \param Communicator is needed for the communication between the processes with mpi
 //! \param CudaMemoryManager is needed for moving the data between host and device
 //! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding
@@ -68,23 +68,24 @@ extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &c
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
-//! \brief calls exchangeCollDataXGPU27() for exchanging all nodes
-//! \details used in the communication after collision step
+//! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes
+//! \details Used in the communication after collision step
 extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaManager, int level, int streamIndex);
-//! \brief calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
-//! interpolation processes on refined grids \details Only exchange nodes which are part of the interpolation process on
+//! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
+//! interpolation processes on refined grids 
+//! \details Only exchange nodes which are part of the interpolation process on
 //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and
-//! before the interpolation coarse to fine. \ref see master thesis of Anna Wellmann
+//! before the interpolation coarse to fine. See [master thesis of Anna Wellmann]
 extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaManager, int level, int streamIndex);
-//! \brief distribute the receive nodes (x direction) from the buffer on the gpu
-//! \details needed to exchange all nodes, used in the communication after collision step
+//! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
+//! \details Needed to exchange all nodes, used in the communication after collision step
 extern "C" void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
-//! \brief distribute the receive nodes (x direction) from the buffer on the gpu
+//! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
-//! \ref see master thesis of Anna Wellmann
+//! See [master thesis of Anna Wellmann]
 extern "C" void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
index a58f320a0e72e1ac5b14eb8b3d1ccb3a7766d2a3..3c68ef7c57bc37b9c1f713df1052d483bee3bda1 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
@@ -1,7 +1,7 @@
 //! \file IndexRearrangementForStreams.h
 //! \ingroup GPU
 //! \author Anna Wellmann
-//! \ref master thesis of Anna Wellmann
+//! \details See [master thesis of Anna Wellmann]
 
 #ifndef IndexRearrangementForStreams_H
 #define IndexRearrangementForStreams_H
@@ -16,39 +16,30 @@
 
 class Parameter;
 class GridBuilder;
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 class Communicator;
 }
-} // namespace vf
 
 class IndexRearrangementForStreams
 {
-private:
-    std::shared_ptr<GridBuilder> builder;
-    std::shared_ptr<Parameter> para;
-    vf::gpu::Communicator& communicator;
 public:
-    //! \brief construct IndexRearrangementForStreams object
+    //! \brief Construct IndexRearrangementForStreams object
     IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder, vf::gpu::Communicator& communicator);
 
     //////////////////////////////////////////////////////////////////////////
     // communication after coarse to fine
     //////////////////////////////////////////////////////////////////////////
 
-    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in x direction
+    //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in x direction
     //! \details Only the nodes involved in the interpolation need to be exchanged. Therefore in this method all nodes,
     //! which are part of the interpolation as well as the communication, are identified.
-    //!
-    //! \ref see master thesis of Anna
-    //! Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten")
+    //!See [master thesis of Anna Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten")]
     void initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j, int direction);
-    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in y direction
+    //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in y direction
     //! \details --> see x direction
     void initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction);
-    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in z direction
+    //! \brief Initialize the arrays for the communication after the interpolation from fine to coarse in z direction
     //! \details --> see x direction
     void initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction);
 
@@ -57,7 +48,7 @@ public:
     // split interpolation cells
     //////////////////////////////////////////////////////////////////////////
 
-    //! \brief split the interpolation cells from coarse to fine into border an bulk
+    //! \brief Split the interpolation cells from coarse to fine into border an bulk
     //! \details For communication hiding, the interpolation cells from the coarse to the fine grid need to be split
     //! into two groups:
     //!
@@ -65,10 +56,10 @@ public:
     //!
     //! - the other cells which are not directly related to the communication between the two gpus --> "bulk"
     //!
-    //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")
+    //! see [master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")]
     void splitCoarseToFineIntoBorderAndBulk(const uint &level);
 
-    //! \brief split the interpolation cells from fine to coarse into border an bulk
+    //! \brief Split the interpolation cells from fine to coarse into border an bulk
     //! \details For communication hiding, the interpolation cells from the fine to the coarse grid need to be split
     //! into two groups:
     //!
@@ -76,7 +67,7 @@ public:
     //!
     //! - the other cells which are not directly related to the communication between the two gpus --> "bulk"
     //!
-    //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")
+    //! See [master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")]
     void splitFineToCoarseIntoBorderAndBulk(const uint &level);
 
 private:
@@ -84,7 +75,7 @@ private:
     // communication after coarse to fine
     //////////////////////////////////////////////////////////////////////////
 
-    //! \brief inits pointers for reduced communication after interpolation fine to coarse by copying them from "normal"
+    //! \brief Initializes pointers for reduced communication after interpolation fine to coarse by copying them from "normal"
     //! communication
     void copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor);
     void copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor);
@@ -97,7 +88,7 @@ private:
     void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
 
-    //! \brief the send indices are reordered for the communication after the interpolation from fine to coarse
+    //! \brief The send indices are reordered for the communication after the interpolation from fine to coarse
     //! \details The indices of nodes which are part of the interpolation are moved to the front of vector with the send
     //! indices. 
     //! \pre para->getParH(level)->intCF needs to be inititalized 
@@ -106,22 +97,22 @@ private:
     //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering
     void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction,
                                             int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    //! \brief check if a sparse index occurs in the ICellFCC
+    //! \brief Check if a sparse index occurs in the ICellFCC
     bool isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndexSend, int level);
-    //! \brief aggregate all nodes in the coarse cells for the interpolation in coarse to fine
+    //! \brief Aggregate all nodes in the coarse cells for the interpolation in coarse to fine
     //! \details For the coarse cells in the interpolation from coarse to fine only one node is stored. This methods
     //! looks for the other nodes of each cell and puts them into vector. Duplicate nodes are only stored once.
     void aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC);
-    //! \brief add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC
+    //! \brief Add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC
     void addUniqueIndexToCommunicationVectors(std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend,
                                               std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions,
                                               uint &posInSendIndices) const;
-    //! \brief find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors()
+    //! \brief Find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors()
     void
     findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices,
                                                         std::vector<int> &sendIndicesAfterFtoC,
                                                         std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const;
-    //! \brief find all indices which are not part of the communication after the interpolation from fine to coarse
+    //! \brief Find all indices which are not part of the communication after the interpolation from fine to coarse
     void findIndicesNotInCommAfterFtoC(const uint &numberOfSendOrRecvIndices, int *sendOrReceiveIndices,
                                        std::vector<int> &sendOrReceiveIndicesAfterFtoC,
                                        std::vector<int> &sendOrIndicesOther);
@@ -133,7 +124,7 @@ private:
     void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
                                              
-    //! \brief reorder the receive indices in the same way that the send indices were reordered.
+    //! \brief Reorder the receive indices in the same way that the send indices were reordered.
     //! \details When the send indices are reordered, the receive indices need to be reordered accordingly.
     //! \pre sendIndicesForCommAfterFtoCPositions should not be empty
     //! \param recvIndices is the pointer to the vector with the receive indices, which will be reordered in this function
@@ -159,6 +150,11 @@ private:
     void getGridInterfaceIndicesBorderBulkFC(int level);
 
 
+private:
+    std::shared_ptr<GridBuilder> builder;
+    std::shared_ptr<Parameter> para;
+    vf::gpu::Communicator& communicator;
+
     // used for tests
     friend class IndexRearrangementForStreamsTest_reorderSendIndices;
 };
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index e6960cc18ff7905fdcc351f3396a7a0948243dbf..3151e6bedeb6a96666f11f0040de2c95b20cc42c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -27,25 +27,6 @@ void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy
     this->checkStrategy = strategy;
 }
 
-
-
 KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {}
 
-KernelImp::KernelImp() {}
-
-std::unique_ptr<std::pair<dim3, dim3>> KernelImp::calcGridDimensions(unsigned int size_Mat, int numberOfThreads)
-{
-    int Grid = (size_Mat / numberOfThreads) + 1;
-    int Grid1, Grid2;
-    if (Grid > 512) {
-        Grid1 = 512;
-        Grid2 = (Grid / Grid1) + 1;
-    } else {
-        Grid1 = 1;
-        Grid2 = Grid;
-    }
-    dim3 grid(Grid1, Grid2);
-    dim3 threads(numberOfThreads, 1, 1);
-    std::pair<dim3, dim3> dimensions(grid, threads);
-    return std::make_unique<std::pair<dim3, dim3>>(dimensions);
-}
+KernelImp::KernelImp() {}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index 1fb3d51a293b98af5f6f7880b46195c060f1fc7e..cba3540905df0314d6ce1eb6f0a1eab8d4a5a4c4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -33,8 +33,6 @@ protected:
     KernelGroup myKernelGroup;
 
     vf::cuda::CudaGrid cudaGrid;
-    
-    std::unique_ptr<std::pair<dim3, dim3>> calcGridDimensions(unsigned int size_Mat, int numberOfThreads);
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu
index 1caf0340c11cfc86214781ad89e2c6318730b5a6..2b3b72a6888e62ccac1009d2f1ece14b96bf93be 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.cu
@@ -2,6 +2,7 @@
 
 #include "Parameter/Parameter.h"
 #include "CumulantK17Comp_Device.cuh"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,11 +11,7 @@ std::shared_ptr<CumulantK17Comp> CumulantK17Comp::getNewInstance(std::shared_ptr
 
 void CumulantK17Comp::run()
 {
-    dim3 grid, threads;
-    std::tie(grid, threads) =
-        *calcGridDimensions(para->getParD(level)->size_Mat_SP, para->getParD(level)->numberofthreads);
-
-	LB_Kernel_CumulantK17Comp <<< grid, threads >>>(para->getParD(level)->omega,
+	LB_Kernel_CumulantK17Comp <<< cudaGrid.grid, cudaGrid.threads >>>(para->getParD(level)->omega,
 													para->getParD(level)->geoSP,
 													para->getParD(level)->neighborX_SP,
 													para->getParD(level)->neighborY_SP,
@@ -28,12 +25,9 @@ void CumulantK17Comp::run()
 	getLastCudaError("LB_Kernel_CumulantK17Comp execution failed");
 }
 
-CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level)
+CumulantK17Comp::CumulantK17Comp(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
-	this->para = para;
-	this->level = level;
-
 	myPreProcessorTypes.push_back(InitCompSP27);
-
 	myKernelGroup = BasicKernel;
+	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
index a91008421d37643869dde3c7b398e12fd8778b63..09db3da401edbb4be2a3e3409dac9138c6fad4ad 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
@@ -2,6 +2,7 @@
 
 #include "Parameter/Parameter.h"
 #include "CumulantK17CompChim_Device.cuh"
+#include "cuda/CudaGrid.h"
 
 std::shared_ptr<CumulantK17CompChim> CumulantK17CompChim::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
@@ -10,11 +11,7 @@ std::shared_ptr<CumulantK17CompChim> CumulantK17CompChim::getNewInstance(std::sh
 
 void CumulantK17CompChim::run()
 {
-    dim3 grid, threads;
-    std::tie(grid, threads) =
-        *calcGridDimensions(para->getParD(level)->size_Mat_SP, para->getParD(level)->numberofthreads);
-
-	LB_Kernel_CumulantK17CompChim <<< grid, threads >>>(
+	LB_Kernel_CumulantK17CompChim <<< cudaGrid.grid, cudaGrid.threads >>>(
 		para->getParD(level)->omega,
 		para->getParD(level)->geoSP,
 		para->getParD(level)->neighborX_SP,
@@ -33,12 +30,9 @@ void CumulantK17CompChim::run()
 	getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
 }
 
-CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level)
+CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
-	this->para = para;
-	this->level = level;
-
 	myPreProcessorTypes.push_back(InitCompSP27);
-
 	myKernelGroup = BasicKernel;
+	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
index 1f8538d5c39df3c7f40c77a90c664da0ae14af32..255452dbc016fdc732277e17f9736d3713db719a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
@@ -14,11 +14,7 @@ std::shared_ptr<CumulantK17CompChimStream> CumulantK17CompChimStream::getNewInst
 
 void CumulantK17CompChimStream::run()
 {
-    dim3 grid, threads;
-    std::tie(grid, threads) =
-        *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads);
-
-	LB_Kernel_CumulantK17CompChimStream <<< grid, threads >>>(
+	LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>(
 		para->getParD(level)->omega,
 		para->getParD(level)->neighborX_SP,
 		para->getParD(level)->neighborY_SP,
@@ -36,13 +32,9 @@ void CumulantK17CompChimStream::run()
 
 void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex)
 {
-    dim3 grid, threads;
-    std::tie(grid, threads) =
-        *calcGridDimensions(para->getParD(level)->numberOfFluidNodes, para->getParD(level)->numberofthreads);
-
     cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
 
-    LB_Kernel_CumulantK17CompChimStream<<<grid, threads, 0, stream>>>(
+    LB_Kernel_CumulantK17CompChimStream<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>(
         para->getParD(level)->omega, 
 	    para->getParD(level)->neighborX_SP, 
 	    para->getParD(level)->neighborY_SP,
@@ -59,13 +51,10 @@ void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsign
     
 }
 
-CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level)
+CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
-	this->para = para;
-	this->level = level;
-
 	myPreProcessorTypes.push_back(InitCompSP27);
-
 	myKernelGroup = BasicKernel;
+	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
index 4ed2118198466438d734ba7877db0b55447f4f3e..3731836f336d91c1bc4cc5f1a8f5ea0a10bee0a6 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
@@ -31,10 +31,6 @@
 #include <helper_cuda.h>
 #include <iostream>
 
-CudaStreamManager::CudaStreamManager() {}
-
-CudaStreamManager::~CudaStreamManager() {}
-
 void CudaStreamManager::launchStreams(uint numberOfStreams)
 {
     cudaStreams.resize(numberOfStreams);
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index d78bd05a2d7e2a474a40ab3368d8a6b7c73d1bde..c2d515ab5fe9c24388632a7ca9e1e4c78b7f1467 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -44,9 +44,6 @@ private:
     const int bulkStreamIndex         = 0;
 
 public:
-    CudaStreamManager();
-    ~CudaStreamManager();
-
     void launchStreams(uint numberOfStreams);
     void terminateStreams();
     cudaStream_t &getStream(uint streamIndex);