diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index 7d2d79d8c21f1966a5b2369f2649df2efc3b4bcb..7ec203e78ebfe8958c87882b91d91049527b45c5 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -150,10 +150,10 @@ void multipleLevel(const std::string& configPath)
     }
 
     const bool readPrecursor = config.getValue("readPrecursor", false);
-    int nTReadPrecursor;
+    int timestepsBetweenReadsPrecursor;
     if(readPrecursor)
     {
-        nTReadPrecursor = config.getValue<int>("nTimestepsReadPrecursor");
+        timestepsBetweenReadsPrecursor = config.getValue<int>("nTimestepsReadPrecursor");
         precursorDirectory = config.getValue<std::string>("precursorDirectory");
         useDistributions     = config.getValue<bool>("useDistributions", false);
     }
@@ -235,14 +235,19 @@ void multipleLevel(const std::string& configPath)
     bool isLastSubDomain  = (procID == nProcs-1 && nProcs > 1)?                    true: false;
     bool isMidSubDomain   = (!isFirstSubDomain && !isLastSubDomain && nProcs > 1)? true: false;
     
-    if(isFirstSubDomain || isMidSubDomain)
+    if(isFirstSubDomain)
     {
         xGridMax += overlap;
-        // xGridMin -= overlap;
+        if(!readPrecursor) xGridMin -= overlap;
     }
-    if(isLastSubDomain || isMidSubDomain)
+    if(isLastSubDomain)
     {
-        // xGridMax += overlap;
+        xGridMin -= overlap;
+        if(!readPrecursor) xGridMax += overlap;
+    }
+    if(isMidSubDomain)
+    {
+        xGridMax += overlap;
         xGridMin -= overlap;
     }
 
@@ -252,8 +257,7 @@ void multipleLevel(const std::string& configPath)
     {
         gridBuilder->setNumberOfLayers(4,0);
         real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax;   //Stop refinement some distance before outlet if domain ist not periodic
-        // gridBuilder->addGrid( new Cuboid( xGridMin+dx, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
-        gridBuilder->addGrid( new Cuboid( 0.f, 0.f, 0.f, 5000.0, L_y,  0.5*L_z) , 1 );
+        gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
         para->setMaxLevel(2);
         scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
     }
@@ -302,9 +306,9 @@ void multipleLevel(const std::string& configPath)
     {
         if(isFirstSubDomain || nProcs == 1)
         {   
-            // auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK);
-            // gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, nTReadPrecursor);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
+            auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK);
+            gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, timestepsBetweenReadsPrecursor);
+            // gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
         }
 
         if(isLastSubDomain || nProcs == 1)
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
index 7c2c41bb8af72422ac0289a9c2ab3784adb5772d..8b3fca3e374f4ca08ab3a3291a49a242b0b2ff70 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -156,7 +156,7 @@ void VTKFile::unloadFile()
     this->loaded = false;
 }
 
-void VTKFile::getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite)
+void VTKFile::getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite)
 {
     if(!this->loaded) loadFile();
 
@@ -167,7 +167,7 @@ void VTKFile::getData(real* data, uint numberOfNodes, std::vector<uint> readInde
         real* quant = &data[j*numberOfNodes];
         for(size_t i=0; i<nPoints; i++)
         {
-            quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndeces[i]+offsetRead];
+            quant[offsetWrite+writeIndices[i]] = this->quantities[j].values[readIndices[i]+offsetRead];
         }
     }
 }
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
index 7b1145185fe0ea1d515127d46cbd66bdbad7f38d..49b085973a39181cf0cfe23d44e818d3061ed4ca 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
@@ -40,7 +40,7 @@ public:
         // printFileInfo();
     };
 
-    void getData(real* data, uint numberOfNodes, std::vector<uint> readIndeces, std::vector<uint> writeIndices, uint offsetRead, uint offsetWrite);
+    void getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite);
     bool markNANs(std::vector<uint> readIndices);
     bool inBoundingBox(real posX, real posY, real posZ){return  inXBounds(posX) && inYBounds(posY) && inZBounds(posZ); };
     bool inXBounds(real posX){ return posX<=maxX && posX>=minX; };
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
index 12e1fb61c81948c88febed8960f0a3a9548fbfb4..22342aec9839afad9bb37b1b11812f6d1750ed7b 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
@@ -337,9 +337,9 @@ public:
 class PrecursorBoundaryCondition : public gg::BoundaryCondition
 {
 public:
-    static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int nTRead, real velocityX, real velocityY, real velocityZ)
+    static SPtr<PrecursorBoundaryCondition> make(SPtr<TransientBCInputFileReader> reader, int timeStepsBetweenReads, real velocityX, real velocityY, real velocityZ)
     {
-        return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, nTRead, velocityX, velocityY, velocityZ));
+        return SPtr<PrecursorBoundaryCondition>(new PrecursorBoundaryCondition(reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ));
     }
 
     SPtr<TransientBCInputFileReader> getReader(){ return reader; }
@@ -348,13 +348,13 @@ public:
     real getVelocityZ() { return velocityZ; }
 
 private:
-    PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _nTRead, real vx, real vy, real vz) : reader(_reader), nTRead(_nTRead), velocityX(vx), velocityY(vy), velocityZ(vz) { };
+    PrecursorBoundaryCondition(SPtr<TransientBCInputFileReader> _reader, uint _timeStepsBetweenReads, real vx, real vy, real vz) : reader(_reader), timeStepsBetweenReads(_timeStepsBetweenReads), velocityX(vx), velocityY(vy), velocityZ(vz) { };
     virtual char getType() const override
     {
         return vf::gpu::BC_VELOCITY;
     }
 public:
-    uint nTRead; //!> read data every nth timestep
+    uint timeStepsBetweenReads; //!> read data every nth timestep
 
 private:
     real velocityX = 0.0;
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index 270aff9d8ff9639b8ae0c19451ca90990eba9c63..9b430188235845d416e085f55a4586ee4b7c8f20 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -85,25 +85,6 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
 
                 boundaryCondition->patches.push_back(0);
             }
-            // else if(boundaryCondition->getType()==vf::gpu::BC_STRESS && (index != INVALID_INDEX) && (   grid->getFieldEntry(index) == vf::gpu::FLUID
-            //                                 ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
-            //                                 ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
-            //                                 ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
-            //                                 ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF 
-            //                                 ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
-            //                                 ||  grid->getFieldEntry(index)  == vf::gpu::BC_PRESSURE
-            //                                 ))
-            // {
-            //     grid->setFieldEntry(index, boundaryCondition->getType());
-            //     boundaryCondition->indices.push_back(index);
-            //     setPressureNeighborIndices(boundaryCondition, grid, index);
-            //     setStressSamplingIndices(boundaryCondition, grid, index);
-
-            //     setQs(grid, boundaryCondition, index);
-
-            //     boundaryCondition->patches.push_back(0);
-            // }
-
         }
     }
 }
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index c9aa7678b9d4391a47707880a2fcb9eb90547864..8c3db0afdf89ceb460b87d6b0b0ccd91db56f428 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -118,7 +118,7 @@ public:
     virtual void getPrecursorValues(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, 
                                     real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, 
                                     int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
-                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, 
+                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, 
                                     real& velocityX, real& velocityY, real& velocityZ, int level) const = 0;
 
     virtual void getPrecursorQs(real* qs[27], int level) const  = 0;
@@ -147,18 +147,18 @@ public:
 
     virtual void findFluidNodes(bool splitDomain) = 0;
 
-    virtual void addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level)           = 0;
-    virtual void addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level) = 0;
-    virtual void addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level)       = 0;
+    virtual void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level)           = 0;
+    virtual void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) = 0;
+    virtual void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level)       = 0;
     virtual void sortFluidNodeIndicesMacroVars(uint level) = 0;
     virtual void sortFluidNodeIndicesApplyBodyForce(uint level) = 0;
     virtual void sortFluidNodeIndicesAllFeatures(uint level) = 0;
-    virtual uint getNumberOfFluidNodesMacroVars(unsigned int level) const = 0;
-    virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const = 0;
-    virtual uint getNumberOfFluidNodesApplyBodyForce(unsigned int level) const = 0;
-    virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, const int level) const = 0;
-    virtual uint getNumberOfFluidNodesAllFeatures(unsigned int level) const = 0;
-    virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const = 0;
+    virtual uint getNumberOfFluidNodesMacroVars(uint level) const = 0;
+    virtual void getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, int level) const = 0;
+    virtual uint getNumberOfFluidNodesApplyBodyForce(uint level) const = 0;
+    virtual void getFluidNodeIndicesApplyBodyForce(uint *fluidNodeIndicesApplyBodyForce, int level) const = 0;
+    virtual uint getNumberOfFluidNodesAllFeatures(uint level) const = 0;
+    virtual void getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, int level) const = 0;
 
 
 };
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index 267f03c5aefa809f68af6d9750cf4afbf3cfe3e2..0a31b6f910b9d4cc2449cd422a999f70c4485468 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -122,7 +122,7 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
 //=======================================================================================
 //! \brief Set stress boundary concdition using iMEM
 //! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled
-//! \param z0 roghness length [m]
+//! \param z0 roughness length [m]
 //! \param dx dx of level 0 [m] 
 //!
 void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType, 
@@ -251,7 +251,7 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition()
     }
 }
 
-void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int nTRead, 
+void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, 
                                                         real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap)
 {
     if(fileLevelToGridLevelMap.empty())                         
@@ -271,7 +271,7 @@ void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<Fil
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
         auto reader = createReaderForCollection(fileCollection, fileLevelToGridLevelMap[level]);
-        SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, nTRead, velocityX, velocityY, velocityZ);
+        SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, timeStepsBetweenReads, velocityX, velocityY, velocityZ);
 
         auto side = SideFactory::make(sideType);
 
@@ -648,20 +648,20 @@ uint LevelGridBuilder::getPrecursorSize(int level) const
 void LevelGridBuilder::getPrecursorValues(  uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, 
                                             real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, 
                                             int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
-                                            int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead, 
+                                            int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, 
                                             real& velocityX, real& velocityY, real& velocityZ, int level) const
 {
     int allIndicesCounter = 0;
     int allNodesCounter = 0;
-    uint tmpNTRead = 0;
+    uint tmpTimeStepsBetweenReads = 0;
     size_t tmpNQuantities = 0;
     
     for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
     {
-        if( tmpNTRead == 0 )
-            tmpNTRead = boundaryCondition->nTRead;
-        if( tmpNTRead != boundaryCondition->nTRead )
-            throw std::runtime_error("All precursor boundary conditions must have the same NTRead value");
+        if( tmpTimeStepsBetweenReads == 0 )
+            tmpTimeStepsBetweenReads = boundaryCondition->timeStepsBetweenReads;
+        if( tmpTimeStepsBetweenReads != boundaryCondition->timeStepsBetweenReads )
+            throw std::runtime_error("All precursor boundary conditions must have the same timeStepsBetweenReads value");
         auto BCreader = boundaryCondition->getReader();
         BCreader->setWritingOffset(allIndicesCounter);
         reader.push_back(BCreader);
@@ -690,9 +690,9 @@ void LevelGridBuilder::getPrecursorValues(  uint* neighborNT, uint* neighborNB,
     }
     numberOfPrecursorNodes = allNodesCounter;
 
-    if (tmpNTRead == 0)
-        throw std::runtime_error("NTRead of precursor needs to be larger than 0.");
-    nTRead = tmpNTRead;
+    if (tmpTimeStepsBetweenReads == 0)
+        throw std::runtime_error("timeStepsBetweenReads of precursor needs to be larger than 0.");
+    timeStepsBetweenReads = tmpTimeStepsBetweenReads;
     
     if (tmpNQuantities == 0)
         throw std::runtime_error("Number of quantities in precursor needs to be larger than 0.");
@@ -798,17 +798,17 @@ void LevelGridBuilder::findFluidNodes(bool splitDomain)
 }
 
 
-void LevelGridBuilder::addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level) 
+void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) 
 {
     grids[level]->addFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars);
 }
 
-void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level)
+void LevelGridBuilder::addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level)
 {
     grids[level]->addFluidNodeIndicesApplyBodyForce(fluidNodeIndicesApplyBodyForce);
 }
 
-void LevelGridBuilder::addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level)
+void LevelGridBuilder::addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level)
 {
     grids[level]->addFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures);
 }
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index cc6f31800c2e55ca604ff2f223475ba54d3afd56..afaf4118469a3cc935cd862d311629fabd1c0379 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -84,8 +84,9 @@ public:
     GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho);
     GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z);
     GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType);
-    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int nTRead, real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f,          
-                                                                std::vector<uint> fileLevelToGridLevelMap = {});
+    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, 
+                                                            real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f,     
+                                                            std::vector<uint> fileLevelToGridLevelMap = {});
 
     GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall);
 
@@ -131,7 +132,7 @@ public:
     GRIDGENERATOR_EXPORT void getPrecursorValues(   uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB, 
                                                     real* weightsNT, real* weightsNB, real* weightsST, real* weightsSB, 
                                                     int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
-                                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& nTRead,
+                                                    int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads,
                                                     real& velocityX, real& velocityY, real& velocityZ, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override;
 
@@ -211,9 +212,9 @@ public:
     // needed for CUDA Streams MultiGPU (Communication Hiding)
     void findFluidNodes(bool splitDomain) override;
 
-    void addFluidNodeIndicesMacroVars(std::vector<uint> fluidNodeIndicesMacroVars, uint level) override;
-    void addFluidNodeIndicesApplyBodyForce(std::vector<uint> fluidNodeIndicesApplyBodyForce, uint level) override;
-    void addFluidNodeIndicesAllFeatures(std::vector<uint> fluidNodeIndicesAllFeatures, uint level) override;
+    void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) override;
+    void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) override;
+    void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) override;
     
     void sortFluidNodeIndicesMacroVars(uint level) override;
     void sortFluidNodeIndicesApplyBodyForce(uint level) override;
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index 981245c7098d960ebf6f975bdab8cdf1ecbb6e38..8283bf569e266b84f020334a306d93756b01c394 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -116,7 +116,7 @@ private:
     int *sparseIndices;
 
     std::vector<uint> fluidNodeIndices;                 // run on CollisionTemplate::Default
-    std::vector<uint> fluidNodeIndicesBorder;           // run on border nodes
+    std::vector<uint> fluidNodeIndicesBorder;           // run on subdomain border nodes (CollisionTemplate::SubDomainBorder)
     std::vector<uint> fluidNodeIndicesMacroVars;        // run on CollisionTemplate::MacroVars
     std::vector<uint> fluidNodeIndicesApplyBodyForce;   // run on CollisionTemplate::ApplyBodyForce
     std::vector<uint> fluidNodeIndicesAllFeatures;      // run on CollisionTemplate::AllFeatures
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
index 1c279ad523e8442bc68af079f1c789a1b130cd35..49543f37df7fb54290f4ab6c09edb8d10c0b67be 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
@@ -72,16 +72,16 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete
     //! 1. run collision for nodes which are at the border of the gpus/processes, running with WriteMacroVars in case probes sample on these nodes
     //!    
     updateGrid->collisionUsingIndices(  level, t, 
-                                        para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Border],
-                                        para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Border], 
+                                        para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder],
+                                        para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder], 
                                         CollisionTemplate::WriteMacroVars,  
-                                        CudaStreamIndex::Border);
+                                        CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::Border);
+    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border);
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
 
     //! 3. launch the collision kernel for bulk nodes. This includes nodes with \param tag Default, WriteMacroVars, ApplyBodyForce, 
     //!    or AllFeatures. All assigned tags are listed in \param allocatedBulkFluidNodeTags during initialization in Simulation::init
@@ -97,5 +97,5 @@ void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Paramete
                                             CudaStreamIndex::Bulk);
     }
     //! 4. exchange information between GPUs
-    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::Border);
+    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
index fe7da04d4074a37321f4340f1d76f0606963b89e..b8ca4e9c2020e17cd0192267ac5d931b510afc3a 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
@@ -42,27 +42,27 @@ void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *u
     //!
     //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::Border);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::Border);
+    updateGrid->prepareExchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border);
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
 
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
     para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::Border);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::Border);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (only nodes which are part of the interpolation)
     //!
-    updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::Border);
+    updateGrid->exchangeMultiGPUAfterFtoC(level, CudaStreamIndex::SubDomainBorder);
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::Border);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
@@ -73,27 +73,27 @@ void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *up
     //!
     //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::Border);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBorder, para->getParD(level)->offFC, CudaStreamIndex::SubDomainBorder);
 
     //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
     //!
-    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::Border);
+    updateGrid->prepareExchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
     if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::Border);
+        para->getStreamManager()->triggerStartBulkKernel(CudaStreamIndex::SubDomainBorder);
 
     //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
     //!
     para->getStreamManager()->waitOnStartBulkKernelEvent(CudaStreamIndex::Bulk);
-    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::Border);
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::Border);
+    updateGrid->fineToCoarse(level, &para->getParD(level)->intFCBulk, para->getParD(level)->offFCBulk, CudaStreamIndex::SubDomainBorder);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBulk, para->getParD(level)->offCFBulk, CudaStreamIndex::SubDomainBorder);
 
     //! 4. exchange information between GPUs (all nodes)
     //!
-    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::Border);
+    updateGrid->exchangeMultiGPU(level, CudaStreamIndex::SubDomainBorder);
 
     // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
     //!
-    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::Border);
+    updateGrid->coarseToFine(level, &para->getParD(level)->intCFBorder, para->getParD(level)->offCF, CudaStreamIndex::SubDomainBorder);
 
     cudaDeviceSynchronize();
 }
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 44202b71686df31ba9d6e2dd6f6cf18453fd3408..00a7b45668e2050467f3d1122455dc74d0ad4f1c 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -240,7 +240,7 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
                             std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborHost)
 {
-    cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::Border);
+    cudaStream_t stream = para->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Device to Host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index a89b5a1ca821ce794263f77fcc6772d169879401..c3dd9e41495645d06e54514cc25eb336ed6fc9c5 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -120,11 +120,11 @@ void GridGenerator::allocArrays_taggedFluidNodes() {
                     if(para->getParH(level)->numberOfTaggedFluidNodes[tag]>0)
                         para->getParH(level)->allocatedBulkFluidNodeTags.push_back(tag);
                     break;
-                case CollisionTemplate::Border:
-                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::Border, level);
-                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::Border, level);
-                    builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::Border], level);
-                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::Border, level);
+                case CollisionTemplate::SubDomainBorder:
+                    this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesBorder(level), CollisionTemplate::SubDomainBorder, level);
+                    cudaMemoryManager->cudaAllocTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level);
+                    builder->getFluidNodeIndicesBorder(para->getParH(level)->taggedFluidNodeIndices[CollisionTemplate::SubDomainBorder], level);
+                    cudaMemoryManager->cudaCopyTaggedFluidNodeIndices(CollisionTemplate::SubDomainBorder, level);
                     break;
                 case CollisionTemplate::WriteMacroVars:
                     this->setNumberOfTaggedFluidNodes(builder->getNumberOfFluidNodesMacroVars(level), CollisionTemplate::WriteMacroVars, level);
@@ -157,7 +157,7 @@ void GridGenerator::allocArrays_taggedFluidNodes() {
         VF_LOG_INFO("Number of tagged nodes on level {}:", level);
         VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}", 
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default],
-                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Border],
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::WriteMacroVars],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::ApplyBodyForce],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures]    );        
@@ -177,8 +177,8 @@ void GridGenerator::tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices
             builder->addFluidNodeIndicesAllFeatures( taggedFluidNodeIndices, level );
             break;
         case CollisionTemplate::Default:
-        case CollisionTemplate::Border:
-            throw std::runtime_error("Cannot tag fluid nodes as Default or Border!");
+        case CollisionTemplate::SubDomainBorder:
+            throw std::runtime_error("Cannot tag fluid nodes as Default or SubDomainBorder!");
         default:
             throw std::runtime_error("Tagging fluid nodes with invald tag!");
             break;
@@ -347,13 +347,13 @@ void GridGenerator::allocArrays_BoundaryValues()
                     para->getParH(level)->precursorBC.weightsNT, para->getParH(level)->precursorBC.weightsNB, 
                     para->getParH(level)->precursorBC.weightsST, para->getParH(level)->precursorBC.weightsSB, 
                     para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 
-                    para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.nTRead, 
+                    para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads, 
                     para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ,
                     level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             para->getParD(level)->precursorBC.numberOfPrecursorNodes = para->getParH(level)->precursorBC.numberOfPrecursorNodes;
             para->getParD(level)->precursorBC.numberOfQuantities = para->getParH(level)->precursorBC.numberOfQuantities;
-            para->getParD(level)->precursorBC.nTRead = para->getParH(level)->precursorBC.nTRead;
+            para->getParD(level)->precursorBC.timeStepsBetweenReads = para->getParH(level)->precursorBC.timeStepsBetweenReads;
             para->getParD(level)->precursorBC.velocityX = para->getParH(level)->precursorBC.velocityX;
             para->getParD(level)->precursorBC.velocityY = para->getParH(level)->precursorBC.velocityY;
             para->getParD(level)->precursorBC.velocityZ = para->getParH(level)->precursorBC.velocityZ;
@@ -380,7 +380,7 @@ void GridGenerator::allocArrays_BoundaryValues()
             para->getParD(level)->precursorBC.next = tmp;
 
             //read second timestep of precursor into next and copy next to device
-            real nextTime = para->getParD(level)->precursorBC.nTRead*pow(2,-((real)level))*para->getTimeRatio();
+            real nextTime = para->getParD(level)->precursorBC.timeStepsBetweenReads*pow(2,-((real)level))*para->getTimeRatio();
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
             {   
                 reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime);
diff --git a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
index f7cd7cc1a0dd1fc941547b5d102719a82eef9ca2..c6877cbfeffe5b32c0c2d336e46b02d68cd946a3 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
+++ b/src/gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h
@@ -48,7 +48,7 @@ class Parameter;
 
 using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>;
 using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
-using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real tRatio, real velocityRatio)>;
+using precursorBoundaryConditionFunc = std::function<void(LBMSimulationParameter *, QforPrecursorBoundaryConditions *, real timeRatio, real velocityRatio)>;
 
 class BoundaryConditionFactory
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 253960f96d13cde56caffcf803b8c9650880a001..8807c22300af92b294976c3d59317f4118e0b72e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -530,7 +530,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXIndex(int lev, unsigned int proc
 void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor,
                                                      const unsigned int &memsizeFsRecv)
 {
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
         checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborX[processNeighbor].f[0],
 						 parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
 						 parameter->getD3Qxx() * memsizeFsRecv,
@@ -540,12 +540,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsHD(int lev, unsigned int proce
                                          parameter->getParH(lev)->recvProcessNeighborX[processNeighbor].f[0],
                                          parameter->getD3Qxx() * memsizeFsRecv,
                                          cudaMemcpyHostToDevice,
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor,
                                                      const unsigned int &memsizeFsSend)
 {  
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
     	checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								parameter->getD3Qxx() * memsizeFsSend,
@@ -555,7 +555,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborXFsDH(int lev, unsigned int proce
     								     parameter->getParD(lev)->sendProcessNeighborX[processNeighbor].f[0],
     								     parameter->getD3Qxx() * memsizeFsSend,
     								     cudaMemcpyDeviceToHost,
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor)
 {
@@ -600,7 +600,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYIndex(int lev, unsigned int proc
 }
 void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv)
 {
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborY[processNeighbor].f[0],
 								    parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
 								    parameter->getD3Qxx() * memsizeFsRecv,
@@ -610,11 +610,11 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsHD(int lev, unsigned int proce
                                          parameter->getParH(lev)->recvProcessNeighborY[processNeighbor].f[0],
                                          parameter->getD3Qxx() * memsizeFsRecv,
                                          cudaMemcpyHostToDevice,
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend)
 {
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborY[processNeighbor].f[0],
 	    							parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
 	    							parameter->getD3Qxx() * memsizeFsSend,
@@ -624,7 +624,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborYFsDH(int lev, unsigned int proce
                                          parameter->getParD(lev)->sendProcessNeighborY[processNeighbor].f[0],
                                          parameter->getD3Qxx() * memsizeFsSend,
                                          cudaMemcpyDeviceToHost, 
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor)
 {
@@ -670,7 +670,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZIndex(int lev, unsigned int proc
 void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor,
                                                      const unsigned int &memsizeFsRecv)
 {
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
 	    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    							parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    							parameter->getD3Qxx() * memsizeFsRecv,
@@ -680,12 +680,12 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsHD(int lev, unsigned int proce
 	    				                 parameter->getParH(lev)->recvProcessNeighborZ[processNeighbor].f[0],
 	    				                 parameter->getD3Qxx() * memsizeFsRecv,
 	    				                 cudaMemcpyHostToDevice,
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor,
                                                      const unsigned int &memsizeFsSend)
 {
-    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::Border))
+    if (!parameter->getStreamManager()->streamIsRegistered(CudaStreamIndex::SubDomainBorder))
         checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        					    parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        					    parameter->getD3Qxx() * memsizeFsSend,
@@ -695,7 +695,7 @@ void CudaMemoryManager::cudaCopyProcessNeighborZFsDH(int lev, unsigned int proce
 	        						     parameter->getParD(lev)->sendProcessNeighborZ[processNeighbor].f[0],
 	        						     parameter->getD3Qxx() * memsizeFsSend,
 	        						     cudaMemcpyDeviceToHost,
-                                         parameter->getStreamManager()->getStream(CudaStreamIndex::Border)));
+                                         parameter->getStreamManager()->getStream(CudaStreamIndex::SubDomainBorder)));
 }
 void CudaMemoryManager::cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor)
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index b35e01eb997723eb12f5645857bc230536fe97fe..9ef5057bbf12af887d438e49b974402136fc60c1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -1266,7 +1266,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
                                                 real velocityX,
                                                 real velocityY,
                                                 real velocityZ,
-                                                real tRatio,
+                                                real timeRatio,
                                                 real velocityRatio,
                                                 unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep);
@@ -1292,7 +1292,7 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
                                         real velocityX,
                                         real velocityY,
                                         real velocityZ,
-                                        real tRatio,
+                                        real timeRatio,
                                         real velocityRatio,
                                         unsigned long long numberOfLBnodes,
                                         bool isEvenTimestep);
@@ -1314,7 +1314,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												real* weightsSB,
 												real* fsLast, 
 												real* fsNext,
-												real tRatio,
+												real timeRatio,
 												unsigned long long numberOfLBnodes,
 												bool isEvenTimestep);
 __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
@@ -1336,7 +1336,7 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												real* weightsSB,
 												real* fsLast, 
 												real* fsNext,
-												real tRatio,
+												real timeRatio,
 												unsigned long long numberOfLBnodes,
 												bool isEvenTimestep);
 												
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index ae51ce98e449bd3251f6b4df2a3d430815892a8a..06628edb5e3d47f5b0ed44ce61ec6da134c551da 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -3208,7 +3208,7 @@ void VelSchlaffer27(  unsigned int numberOfThreads,
       getLastCudaError("VelSchlaff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio)
+void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio)
 {
 
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
@@ -3220,12 +3220,12 @@ void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, QforPre
 		boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB,
 		boundaryCondition->last, boundaryCondition->current,
 		boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, 
-		tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
+		timeRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
 	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
 
 }
 //////////////////////////////////////////////////////////////////////////
-void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio)
+void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio)
 {
 
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
@@ -3236,12 +3236,12 @@ void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice, QforPrecursorBou
 		boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB,
 		boundaryCondition->last, boundaryCondition->current,
 		boundaryCondition->velocityX, boundaryCondition->velocityY, boundaryCondition->velocityZ, 
-		tRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
+		timeRatio, velocityRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
 	getLastCudaError("PrecursorDeviceEQ27 execution failed"); 
 
 }
 //////////////////////////////////////////////////////////////////////////
-void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio)
+void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio)
 {
 
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
@@ -3251,13 +3251,13 @@ void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPre
 		boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB,
 		boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB,
 		boundaryCondition->last, boundaryCondition->current,
-		tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
+		timeRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
 	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
 
 }
 
 //////////////////////////////////////////////////////////////////////////
-void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real tRatio, real velocityRatio)
+void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPrecursorBoundaryConditions* boundaryCondition, real timeRatio, real velocityRatio)
 {
 
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
@@ -3267,7 +3267,7 @@ void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPr
 		boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB,
 		boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB,
 		boundaryCondition->last, boundaryCondition->current,
-		tRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
+		timeRatio, parameterDevice->numberOfNodes, parameterDevice->isEvenTimestep);
 	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
 
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
index 9c7958cdc6bc783ba8aadcd0e19877f491fad085..f089a32955af615bfa5744857e1c7d76e2bb42d3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
@@ -32,7 +32,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
                                                 real velocityX,
                                                 real velocityY,
                                                 real velocityZ,
-                                                real tRatio,
+                                                real timeRatio,
                                                 real velocityRatio,
                                                 unsigned long long numberOfLBnodes,
                                                 bool isEvenTimestep)
@@ -89,9 +89,9 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
     }
 
     // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
-    real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio;
-    real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; 
-    real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio;
+    real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; 
+    real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
 
@@ -445,7 +445,7 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
                                         real velocityX,
                                         real velocityY,
                                         real velocityZ,
-                                        real tRatio,
+                                        real timeRatio,
                                         real velocityRatio,
                                         unsigned long long numberOfLBnodes,
                                         bool isEvenTimestep)
@@ -502,9 +502,9 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
     }
 
     // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
-    real VeloX = (velocityX + (1.f-tRatio)*vxLastInterpd + tRatio*vxNextInterpd)/velocityRatio;
-    real VeloY = (velocityY + (1.f-tRatio)*vyLastInterpd + tRatio*vyNextInterpd)/velocityRatio; 
-    real VeloZ = (velocityZ + (1.f-tRatio)*vzLastInterpd + tRatio*vzNextInterpd)/velocityRatio;
+    real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; 
+    real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
 
@@ -666,7 +666,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												real* weightsSB,
 												real* fsLast, 
 												real* fsNext,
-												real tRatio,
+												real timeRatio,
 												unsigned long long numberOfLBnodes,
 												bool isEvenTimestep)
 {
@@ -793,15 +793,15 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
     unsigned int ktne = KQK;
     // unsigned int kbsw = neighborZ[ksw];
 
-    dist.f[DIR_P00][ke]   = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio;
-    dist.f[DIR_PP0][kne]  = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio;
-    dist.f[DIR_PM0][kse]  = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio;
-    dist.f[DIR_P0P][kte]  = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio;
-    dist.f[DIR_P0M][kbe]  = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio;
-    dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio;
-    dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio;
-    dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio;
-    dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio;
+    dist.f[DIR_P00][ke]   = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
+    dist.f[DIR_PP0][kne]  = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
+    dist.f[DIR_PM0][kse]  = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
+    dist.f[DIR_P0P][kte]  = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
+    dist.f[DIR_P0M][kbe]  = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
+    dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
+    dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
+    dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
+    dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -825,7 +825,7 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												real* weightsSB,
 												real* fsLast, 
 												real* fsNext,
-												real tRatio,
+												real timeRatio,
 												unsigned long long numberOfLBnodes,
 												bool isEvenTimestep)
 {
@@ -955,15 +955,15 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
     getPointersToSubgridDistances(qs, subgridDistances, sizeQ);
 
     real q;
-    q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][ke]   = f0LastInterp*(1.f-tRatio) + f0NextInterp*tRatio;
-    q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kne]  = f1LastInterp*(1.f-tRatio) + f1NextInterp*tRatio;
-    q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kse]  = f2LastInterp*(1.f-tRatio) + f2NextInterp*tRatio;
-    q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kte]  = f3LastInterp*(1.f-tRatio) + f3NextInterp*tRatio;
-    q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kbe]  = f4LastInterp*(1.f-tRatio) + f4NextInterp*tRatio;
-    q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-tRatio) + f5NextInterp*tRatio;
-    q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-tRatio) + f6NextInterp*tRatio;
-    q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-tRatio) + f7NextInterp*tRatio;
-    q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-tRatio) + f8NextInterp*tRatio;
+    q = qs.q[DIR_P00][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P00][ke]   = f0LastInterp*(1.f-timeRatio) + f0NextInterp*timeRatio;
+    q = qs.q[DIR_PP0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PP0][kne]  = f1LastInterp*(1.f-timeRatio) + f1NextInterp*timeRatio;
+    q = qs.q[DIR_PM0][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PM0][kse]  = f2LastInterp*(1.f-timeRatio) + f2NextInterp*timeRatio;
+    q = qs.q[DIR_P0P][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0P][kte]  = f3LastInterp*(1.f-timeRatio) + f3NextInterp*timeRatio;
+    q = qs.q[DIR_P0M][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_P0M][kbe]  = f4LastInterp*(1.f-timeRatio) + f4NextInterp*timeRatio;
+    q = qs.q[DIR_PPP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPP][ktne] = f5LastInterp*(1.f-timeRatio) + f5NextInterp*timeRatio;
+    q = qs.q[DIR_PMP][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMP][ktse] = f6LastInterp*(1.f-timeRatio) + f6NextInterp*timeRatio;
+    q = qs.q[DIR_PPM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PPM][kbne] = f7LastInterp*(1.f-timeRatio) + f7NextInterp*timeRatio;
+    q = qs.q[DIR_PMM][k]; if(q>= c0o1 && q <= c1o1) dist.f[DIR_PMM][kbse] = f8LastInterp*(1.f-timeRatio) + f8NextInterp*timeRatio;
 
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 7eaf0050b41a0aa67de0a4e592b8a123d9ef3e9f..50b4460d774010ea7d7b98cfa6fa505cdfeb88c2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -16,7 +16,7 @@ class Kernel
 public:
     virtual ~Kernel()  = default;
     virtual void run() = 0;
-    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0; //if stream == -1: run on default stream
+    virtual void runOnIndices(const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIdx=CudaStreamIndex::Legacy) = 0;
 
     virtual bool checkParameter()                                = 0;
     virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
index 54dd11142d9c16063d58330cfe7351394bdfe51c..70b0c4352afee850a4e17243979268bd126b7b4a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
@@ -15,23 +15,21 @@ std::shared_ptr< CumulantK17<turbulenceModel> > CumulantK17<turbulenceModel>::ge
 template<TurbulenceModel turbulenceModel>
 void CumulantK17<turbulenceModel>::run()
 {
-	LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads >>>(   para->getParD(level)->omega, 	
-																												para->getParD(level)->typeOfGridNode, 										
-																												para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																												para->getParD(level)->distributions.f[0],	
-																												para->getParD(level)->rho,		
-																												para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																												para->getParD(level)->turbViscosity,
-																												para->getSGSConstant(),
-																												(unsigned long)para->getParD(level)->numberOfNodes,	
-																												level,				
-																												para->getIsBodyForce(),				
-																												para->getForcesDev(),				
-																												para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																												para->getQuadricLimitersDev(),			
-																												para->getParD(level)->isEvenTimestep,
-																												para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default],
-        																										para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]);
+	LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads >>>(   para->getParD(level)->omega,										
+																										para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
+																										para->getParD(level)->distributions.f[0],	
+																										para->getParD(level)->rho,		
+																										para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
+																										para->getParD(level)->turbViscosity,
+																										para->getSGSConstant(),
+																										(unsigned long)para->getParD(level)->numberOfNodes,	
+																										level,			
+																										para->getForcesDev(),				
+																										para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+																										para->getQuadricLimitersDev(),			
+																										para->getParD(level)->isEvenTimestep,
+																										para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default],
+																										para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]);
 
 	getLastCudaError("LB_Kernel_CumulantK17 execution failed");
 }
@@ -44,89 +42,75 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
 	switch (collisionTemplate)
 	{
 		case CollisionTemplate::Default:
-			LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(   
-																																	para->getParD(level)->omega, 	
-																																	para->getParD(level)->typeOfGridNode, 										
-																																	para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																																	para->getParD(level)->distributions.f[0],	
-																																	para->getParD(level)->rho,		
-																																	para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																																	para->getParD(level)->turbViscosity,
-																																	para->getSGSConstant(),
-																																	(unsigned long)para->getParD(level)->numberOfNodes,	
-																																	level,				
-																																	para->getIsBodyForce(),				
-																																	para->getForcesDev(),				
-																																	para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																																	para->getQuadricLimitersDev(),			
-																																	para->getParD(level)->isEvenTimestep,
-																																	indices,
-																																	size_indices);
+			LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega,								
+																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
+																														para->getParD(level)->distributions.f[0],	
+																														para->getParD(level)->rho,		
+																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
+																														para->getParD(level)->turbViscosity,
+																														para->getSGSConstant(),
+																														(unsigned long)para->getParD(level)->numberOfNodes,	
+																														level,			
+																														para->getForcesDev(),				
+																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+																														para->getQuadricLimitersDev(),			
+																														para->getParD(level)->isEvenTimestep,
+																														indices,
+																														size_indices);
 			break;
 		
 		case CollisionTemplate::WriteMacroVars:
-			LB_Kernel_CumulantK17 < turbulenceModel, true, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( 
-																																para->getParD(level)->omega, 	
-																																para->getParD(level)->typeOfGridNode, 										
-																																para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																																para->getParD(level)->distributions.f[0],	
-																																para->getParD(level)->rho,		
-																																para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																																para->getParD(level)->turbViscosity,
-																																para->getSGSConstant(),
-																																(unsigned long)para->getParD(level)->numberOfNodes,	
-																																level,				
-																																para->getIsBodyForce(),				
-																																para->getForcesDev(),				
-																																para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																																para->getQuadricLimitersDev(),			
-																																para->getParD(level)->isEvenTimestep,
-																																indices,
-																																size_indices);
+			LB_Kernel_CumulantK17 < turbulenceModel, true, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,										
+																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
+																														para->getParD(level)->distributions.f[0],	
+																														para->getParD(level)->rho,		
+																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
+																														para->getParD(level)->turbViscosity,
+																														para->getSGSConstant(),
+																														(unsigned long)para->getParD(level)->numberOfNodes,	
+																														level,			
+																														para->getForcesDev(),				
+																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+																														para->getQuadricLimitersDev(),			
+																														para->getParD(level)->isEvenTimestep,
+																														indices,
+																														size_indices);
 			break;
 		
-		case CollisionTemplate::Border:
+		case CollisionTemplate::SubDomainBorder:
 		case CollisionTemplate::AllFeatures:
-			LB_Kernel_CumulantK17 < turbulenceModel, true, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(  
-																																para->getParD(level)->omega, 	
-																																para->getParD(level)->typeOfGridNode, 										
-																																para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																																para->getParD(level)->distributions.f[0],	
-																																para->getParD(level)->rho,		
-																																para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																																para->getParD(level)->turbViscosity,
-																																para->getSGSConstant(),
-																																(unsigned long)para->getParD(level)->numberOfNodes,	
-																																level,				
-																																para->getIsBodyForce(),				
-																																para->getForcesDev(),				
-																																para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																																para->getQuadricLimitersDev(),			
-																																para->getParD(level)->isEvenTimestep,
-																																indices,
-																																size_indices);
-			break;
-		case CollisionTemplate::ApplyBodyForce:
-			LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( 
-																																para->getParD(level)->omega, 	
-																																para->getParD(level)->typeOfGridNode, 										
-																																para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																																para->getParD(level)->distributions.f[0],	
-																																para->getParD(level)->rho,		
-																																para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																																para->getParD(level)->turbViscosity,
-																																para->getSGSConstant(),
-																																(unsigned long)para->getParD(level)->numberOfNodes,	
-																																level,				
-																																para->getIsBodyForce(),				
-																																para->getForcesDev(),				
-																																para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																																para->getQuadricLimitersDev(),			
-																																para->getParD(level)->isEvenTimestep,
-																																indices,
-																																size_indices);
-			break;
-		default:
+			LB_Kernel_CumulantK17 < turbulenceModel, true, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(  para->getParD(level)->omega,
+																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
+																														para->getParD(level)->distributions.f[0],	
+																														para->getParD(level)->rho,		
+																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
+																														para->getParD(level)->turbViscosity,
+																														para->getSGSConstant(),
+																														(unsigned long)para->getParD(level)->numberOfNodes,	
+																														level,			
+																														para->getForcesDev(),				
+																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+																														para->getQuadricLimitersDev(),			
+																														para->getParD(level)->isEvenTimestep,
+																														indices,
+																														size_indices);
+			break;	case CollisionTemplate::ApplyBodyForce:
+			LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,									
+																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
+																														para->getParD(level)->distributions.f[0],	
+																														para->getParD(level)->rho,		
+																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
+																														para->getParD(level)->turbViscosity,
+																														para->getSGSConstant(),
+																														(unsigned long)para->getParD(level)->numberOfNodes,	
+																														level,			
+																														para->getForcesDev(),				
+																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+																														para->getQuadricLimitersDev(),			
+																														para->getParD(level)->isEvenTimestep,
+																														indices,
+																														size_indices);
+			break;	default:
 			throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
 			break;
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
index d2b679395a673b5b1d74e3f499a1d53b154d4b89..34a444230019a1c6cfacb18e12fd73607fb2e09a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
@@ -58,7 +58,6 @@ using namespace vf::lbm::dir;
 template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce>
 __global__ void LB_Kernel_CumulantK17(
 	real omega_in,
-	uint* typeOfGridNode,
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
@@ -71,7 +70,6 @@ __global__ void LB_Kernel_CumulantK17(
     real SGSconstant,
 	unsigned long numberOfLBnodes,
 	int level,
-    bool bodyForce,
 	real* forces,
     real* bodyForceX,
     real* bodyForceY,
@@ -695,34 +693,34 @@ __global__ void LB_Kernel_CumulantK17(
     (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
 }
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, true, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
\ No newline at end of file
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
index 9d56098f5053cc765b12eab0244a890d18209b1b..b8cc9543e9b531c5aa90cb2961416a6cbc52377d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
@@ -6,7 +6,6 @@
 
 template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce > __global__ void LB_Kernel_CumulantK17(
 	real omega_in,
-	uint* typeOfGridNode,
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
@@ -19,7 +18,6 @@ template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool
 	real SGSconstant,
 	unsigned long numberOfLBnodes,
 	int level,
-	bool bodyForce,
 	real* forces,
 	real* bodyForceX,
 	real* bodyForceY,
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index 29341d2430ec6eff46065b10c1167f9e871edab8..1310cf09e5ce05270e88cd3a53e4910816bd7628 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -398,16 +398,16 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
 
     uint t_level = para->getTimeStep(level, t, true);
 
-    uint lastTime =    (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into last arrays
-    uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into current arrays
-    uint nextTime =     para->getParD(level)->precursorBC.nPrecursorReads   *para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into next arrays
+    uint lastTime =    (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into last arrays
+    uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into current arrays
+    uint nextTime =     para->getParD(level)->precursorBC.nPrecursorReads   *para->getParD(level)->precursorBC.timeStepsBetweenReads; // timestep currently loaded into next arrays
     
     if(t_level>=currentTime)
     {
         //cycle time
         lastTime = currentTime;
         currentTime = nextTime;
-        nextTime += para->getParD(level)->precursorBC.nTRead;
+        nextTime += para->getParD(level)->precursorBC.timeStepsBetweenReads;
 
         //cycle pointers
         real* tmp = para->getParD(level)->precursorBC.last;
@@ -426,6 +426,6 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
         para->getParH(level)->precursorBC.nPrecursorReads++;  
     }
     
-    real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.nTRead;
+    real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.timeStepsBetweenReads;
     precursorBoundaryConditionPost(para->getParD(level).get(), &para->getParD(level)->precursorBC, tRatio, para->getVelocityRatio());
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index 904471123a895d65f33c8d91e6c5e5ed0296a9f6..c07769d5c06f4443335564511d449e82462f046f 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -75,9 +75,9 @@ enum class CollisionTemplate {
    //! - AllFeatures: collision \w write out macroscopic variables AND read and apply body force
    AllFeatures,
    //! - Border: collision on border nodes
-   Border
+   SubDomainBorder
 };
-constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate  = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::Border};
+constexpr std::initializer_list<CollisionTemplate> all_CollisionTemplate  = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures, CollisionTemplate::SubDomainBorder};
 constexpr std::initializer_list<CollisionTemplate> bulk_CollisionTemplate = { CollisionTemplate::Default, CollisionTemplate::WriteMacroVars, CollisionTemplate::ApplyBodyForce, CollisionTemplate::AllFeatures};
 
 struct InitCondition
@@ -237,7 +237,7 @@ typedef struct QforPrecursorBC{
    int sizeQ;
    int numberOfPrecursorNodes=0;
    uint nPrecursorReads=0;
-   uint nTRead;
+   uint timeStepsBetweenReads;
    size_t numberOfQuantities;
    real* q27[27];
    uint* planeNeighborNT, *planeNeighborNB, *planeNeighborST, *planeNeighborSB;
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index b120dd20531e2a4f672f16f59b4e2dfe6decaaf1..84ab84ff93fa7706bcc27d7e61a18f580f3c8dbe 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -139,7 +139,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
     //////////////////////////////////////////////////////////////////////////
     // CUDA streams
     if (para->getUseStreams()) {
-        para->getStreamManager()->registerStream(CudaStreamIndex::Border);
+        para->getStreamManager()->registerStream(CudaStreamIndex::SubDomainBorder);
         para->getStreamManager()->registerStream(CudaStreamIndex::Bulk);
         para->getStreamManager()->launchStreams();
         para->getStreamManager()->createCudaEvents();
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
index 73c6b9fa21999d577349401b31a6183e7b6d7f3b..53e030c0d16116a4edef0135f9ab435c853fd66b 100644
--- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
@@ -1,5 +1,43 @@
-#ifndef FILE_WRITER_H
-#define FILE_WRITER_H
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DistributionDebugInspector.h
+//! \author Henrik Asmuth
+//! \date 13/012/2022
+//! \brief Basic debugging class to print out f's in a certain area of the domain
+//!
+//! Basic debugging class. Needs to be directly added in UpdateGrid (could potentially also be added as a proper Probe in the future)
+//! How to use: Define a part of the domain via min/max x, y, and z. The DistributionDebugInspector will print out all f's in that area.
+//!
+//=======================================================================================
+
+#ifndef DISTRIBUTION_INSPECTOR_H
+#define DISTRIBUTION_INSPECTOR_H
 
 #include "Parameter/Parameter.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
index abe780470f660410ca0e63dff2b3d0f300e7dbda..3cc771e413134e90b0d09d8eeb6dfee791f8a1e2 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.cpp
@@ -29,8 +29,6 @@
 //=======================================================================================
 #include "CudaStreamManager.h"
 #include <helper_cuda.h>
-#include <cuda_runtime.h>
-#include <cuda.h>
 #include <iostream>
 
 void CudaStreamManager::registerStream(CudaStreamIndex streamIndex)
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
index 06fc5624771e2b2b713ae84c24116ac4c622ec9f..5c59bcd3a5e6178d6e70a63f803caf8e29f32604 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/CudaStreamManager.h
@@ -39,7 +39,7 @@ enum class CudaStreamIndex
     {
         Legacy,
         Bulk,
-        Border,
+        SubDomainBorder,
         Precursor,
         ActuatorFarm
     };
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index f83e50d52e609322780130c17ba9563062bf2834..86b7bc2a058d69ba878d4445953a4dc56e524027 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -372,12 +372,12 @@ struct LBMSimulationParameter {
 
     ///////////////////////////////////////////////////////
     std::map<CollisionTemplate, uint*>    taggedFluidNodeIndices = {{CollisionTemplate::Default,        nullptr},
-                                                                    {CollisionTemplate::Border,         nullptr},
+                                                                    {CollisionTemplate::SubDomainBorder,nullptr},
                                                                     {CollisionTemplate::WriteMacroVars, nullptr},
                                                                     {CollisionTemplate::ApplyBodyForce, nullptr},
                                                                     {CollisionTemplate::AllFeatures,    nullptr}};
     std::map<CollisionTemplate, uint >  numberOfTaggedFluidNodes = {{CollisionTemplate::Default,        0},
-                                                                    {CollisionTemplate::Border,         0},
+                                                                    {CollisionTemplate::SubDomainBorder,0},
                                                                     {CollisionTemplate::WriteMacroVars, 0},
                                                                     {CollisionTemplate::ApplyBodyForce, 0},
                                                                     {CollisionTemplate::AllFeatures,    0}};