diff --git a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
index 92434e18ba1a2d4b2bc2027c6697f2d1f8393cf5..798b55919df9e24dbc71ecfded5fb8a913cff8cf 100644
--- a/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
+++ b/src/basics/basics/writer/WbWriterVtkXmlImageBinary.cpp
@@ -40,7 +40,7 @@ using namespace std;
 const std::string WbWriterVtkXmlImageBinary::pvdEndTag = "   </Collection>\n</VTKFile>";
 /*===============================================================================*/
 string WbWriterVtkXmlImageBinary::writeCollection(const string &filename, const vector<string> &filenames,
-                                             const double &timeStep, const bool &sepGroups)
+                                                  const double &timeStep, const bool &sepGroups)
 {
     string vtkfilename = filename + ".pvd";
     ofstream out(vtkfilename.c_str());
@@ -79,7 +79,7 @@ string WbWriterVtkXmlImageBinary::writeCollection(const string &filename, const
 }
 /*===============================================================================*/
 string WbWriterVtkXmlImageBinary::addFilesToCollection(const string &filename, const vector<string> &filenames,
-                                                  const double &timeStep, const bool &sepGroups)
+                                                       const double &timeStep, const bool &sepGroups)
 {
     string vtkfilename = filename;
     fstream test(vtkfilename.c_str(), ios::in);
@@ -106,9 +106,10 @@ string WbWriterVtkXmlImageBinary::addFilesToCollection(const string &filename, c
     return vtkfilename;
 }
 /*===============================================================================*/
-string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, const UbTupleInt6 &wholeExtent, const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing, 
-                                                vector<string> &pieceSources, vector<UbTupleInt6> &pieceExtents,
-                                                vector<string> &pointDataNames, vector<string> &cellDataNames)
+string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, const UbTupleInt6 &wholeExtent,
+                                                    const UbTupleFloat3 &origin, const UbTupleFloat3 &spacing,
+                                                    vector<string> &pieceSources, vector<UbTupleInt6> &pieceExtents,
+                                                    vector<string> &pointDataNames, vector<string> &cellDataNames)
 {
     string vtkfilename = filename + ".pvti";
     UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeParallelFile to " << vtkfilename << " - start");
@@ -170,8 +171,8 @@ string WbWriterVtkXmlImageBinary::writeParallelFile(const string &filename, cons
 }
 /*===============================================================================*/
 string WbWriterVtkXmlImageBinary::writeOctsWithCellData(const string &filename, vector<UbTupleFloat3> &nodes,
-                                                   vector<UbTupleInt8> &cells, vector<string> &datanames,
-                                                   vector<vector<double>> &celldata)
+                                                        vector<UbTupleInt8> & /*cells*/, vector<string> &datanames,
+                                                        vector<vector<double>> &celldata)
 {
     string vtkfilename = filename + getFileExtension();
     UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithCellData to " << vtkfilename << " - start");
@@ -191,8 +192,8 @@ string WbWriterVtkXmlImageBinary::writeOctsWithCellData(const string &filename,
 }
 /*===============================================================================*/
 string WbWriterVtkXmlImageBinary::writeOctsWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes,
-                                                   vector<UbTupleUInt8> &cells, vector<string> &datanames,
-                                                   vector<vector<double>> &nodedata)
+                                                        vector<UbTupleUInt8> & /*cells*/, vector<string> &datanames,
+                                                        vector<vector<double>> &nodedata)
 {
     string vtkfilename = filename + getFileExtension();
     UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeOctsWithNodeData to " << vtkfilename << " - start");
@@ -213,8 +214,7 @@ string WbWriterVtkXmlImageBinary::writeOctsWithNodeData(const string &filename,
 }
 /*===============================================================================*/
 string WbWriterVtkXmlImageBinary::writeNodesWithNodeData(const string &filename, vector<UbTupleFloat3> &nodes,
-                                                    vector<string> &datanames,
-                                                    vector<vector<double>> &nodedata)
+                                                         vector<string> &datanames, vector<vector<double>> &nodedata)
 {
     string vtkfilename = filename + getFileExtension();
     UBLOG(logDEBUG1, "WbWriterVtkXmlImageBinary::writeNodesWithNodeData to " << vtkfilename << " - start");
@@ -231,7 +231,8 @@ string WbWriterVtkXmlImageBinary::writeNodesWithNodeData(const string &filename,
     return vtkfilename;
 }
 
-void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes, UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent)
+void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes, UbTupleFloat3 &origin,
+                                                   UbTupleFloat3 &spacing, UbTupleInt6 &extent)
 {
     int nofNodes = (int)nodes.size();
     val<1>(origin) = val<1>(nodes[0]);
@@ -247,17 +248,17 @@ void WbWriterVtkXmlImageBinary::getMetaDataOfImage(vector<UbTupleFloat3> &nodes,
     int ny = (l_y) / val<2>(spacing);
     val<3>(spacing) = val<3>(nodes[nx*ny])-val<3>(nodes[0]);
 
-    val<1>(extent) = val<1>(origin)/val<1>(spacing); val<2>(extent) = val<1>(nodes[nofNodes-1])/val<1>(spacing);    
-    val<3>(extent) = val<2>(origin)/val<2>(spacing); val<4>(extent) = val<2>(nodes[nofNodes-1])/val<2>(spacing);    
-    val<5>(extent) = val<3>(origin)/val<3>(spacing); val<6>(extent) = val<3>(nodes[nofNodes-1])/val<3>(spacing);    
+    val<1>(extent) = val<1>(origin) / val<1>(spacing); val<2>(extent) = val<1>(nodes[nofNodes - 1]) / val<1>(spacing);    
+    val<3>(extent) = val<2>(origin) / val<2>(spacing); val<4>(extent) = val<2>(nodes[nofNodes - 1]) / val<2>(spacing);    
+    val<5>(extent) = val<3>(origin) / val<3>(spacing); val<6>(extent) = val<3>(nodes[nofNodes - 1]) / val<3>(spacing);    
 
 }
 
-void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename,
-                                            vector<string> &pointDataNames, vector<string> &cellDataNames,
-                                            vector<vector<double>> &nodedata, vector<vector<double>> &celldata,
-                                            UbTupleInt6& wholeExtent,
-                                            UbTupleFloat3& origin, UbTupleFloat3& spacing, UbTupleInt6& extent, unsigned int precision)
+void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename, vector<string> &pointDataNames,
+                                          vector<string> &cellDataNames, vector<vector<double>> &nodedata,
+                                          vector<vector<double>> &celldata, UbTupleInt6 &wholeExtent,
+                                          UbTupleFloat3 &origin, UbTupleFloat3 &spacing, UbTupleInt6 &extent,
+                                          unsigned int precision)
 {
     ofstream out(vtkfilename.c_str(), ios::out | ios::binary);
     out.precision(precision);
@@ -273,14 +274,14 @@ void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename,
             throw UbException(UB_EXARGS, "couldn't open file " + vtkfilename);
     }
 
-    size_t nPoints = pointDataNames.size()>0 ? nodedata[0].size() : celldata[0].size();
+    size_t nPoints = pointDataNames.size() > 0 ? nodedata[0].size() : celldata[0].size();
 
-    int bytesPerByteVal      = 4; //==sizeof(int)
+    int bytesPerByteVal = 4; //==sizeof(int)
 
-    int bytesScalarData      = 1 /*scalar         */ * (int)nPoints * sizeof(double);
+    int bytesScalarData = 1 /*scalar         */ * (int)nPoints * sizeof(double);
 
     int offset = 0;
-    
+
     // VTK FILE
     out << "<?xml version=\"1.0\"?>\n";
     out << "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\" >"
@@ -307,23 +308,21 @@ void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename,
                                     << val<6>(extent) << "\">\n";
 
     // DATA SECTION
-    if (pointDataNames.size()>0)
-    {
+    if (pointDataNames.size() > 0) {
         out << "         <PointData>\n";
         for (size_t s = 0; s < pointDataNames.size(); ++s) {
-            out << "            <DataArray type=\"Float64\" Name=\"" << pointDataNames[s] << "\" format=\"appended\" offset=\""
-                << offset << "\" /> \n";
+            out << "            <DataArray type=\"Float64\" Name=\"" << pointDataNames[s]
+                << "\" format=\"appended\" offset=\"" << offset << "\" /> \n";
             offset += (bytesPerByteVal + bytesScalarData);
         }
         out << "         </PointData>\n";
     }
 
-    if (cellDataNames.size()>0)
-    {
+    if (cellDataNames.size() > 0) {
         out << "         <CellData>\n";
         for (size_t s = 0; s < cellDataNames.size(); ++s) {
-            out << "            <DataArray type=\"Float64\" Name=\"" << cellDataNames[s] << "\" format=\"appended\" offset=\""
-                << offset << "\" /> \n";
+            out << "            <DataArray type=\"Float64\" Name=\"" << cellDataNames[s]
+                << "\" format=\"appended\" offset=\"" << offset << "\" /> \n";
             offset += (bytesPerByteVal + bytesScalarData);
         }
         out << "         </CellData>\n";
@@ -336,7 +335,6 @@ void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename,
     out << "   <AppendedData encoding=\"raw\">\n";
     out << "_";
 
-
     // DATA SECTION
     // pointData
     for (size_t s = 0; s < pointDataNames.size(); ++s) {
@@ -359,4 +357,4 @@ void WbWriterVtkXmlImageBinary::writeData(const string &vtkfilename,
     out << "</VTKFile>";
     out << endl;
     out.close();
-}
\ No newline at end of file
+}
diff --git a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
index fe4078af95904fa5e1580b54f3aa2edbb006bd3d..9c3bac9c3e2795fa99f339461c6a7f2d16448696 100644
--- a/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GksGpu/BoundaryConditions/BoundaryCondition.h
@@ -47,13 +47,13 @@ struct GKSGPU_EXPORT BoundaryCondition : virtual public BoundaryConditionStruct,
     virtual bool isWall() = 0;
 
     virtual bool isFluxBC();
-    
+
     virtual bool isInsulated();
 
     virtual bool secondCellsNeeded();
 
     virtual void runBoundaryConditionKernel( const SPtr<DataBase> dataBase,
-                                             const Parameters parameters, 
+                                             const Parameters parameters,
                                              const uint level ) = 0;
 
     BoundaryConditionStruct toStruct()
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
index 3026458c988c4e7624e15304f144d01f40fb0cdf..5f3c4ad492b16c09b26acd00a624a54ad65dffda 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -156,7 +156,8 @@ void VTKFile::unloadFile()
     this->loaded = false;
 }
 
-void VTKFile::getData(real* data, uint numberOfNodes, const std::vector<uint>& readIndices, const std::vector<uint>& writeIndices, uint offsetRead, uint offsetWrite)
+void VTKFile::getData(real *data, uint numberOfNodes, const std::vector<uint> &readIndices,
+                      const std::vector<uint> &writeIndices, uint offsetRead, uint offsetWrite)
 {
     if(!this->loaded) loadFile();
 
@@ -305,7 +306,10 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
                     this->planeNeighbor0PM.push_back(writeIdx);
                     this->planeNeighbor0MP.push_back(writeIdx);
                     this->planeNeighbor0MM.push_back(writeIdx);
-                    found0PP = true; found0PM = true; found0MM = true; found0MP = true;
+                    found0PP = true;
+                    found0PM = true;
+                    found0MM = true;
+                    found0MP = true;
                 } 
                 else
                 {
@@ -325,40 +329,40 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
             
             if(!found0PP) //NT in simulation is EN in precursor
             {
-                int idx = file.findNeighborPPM(posY, posZ, 0.f);
-                if(idx!=-1)
+                int index = file.findNeighborPPM(posY, posZ, 0.f);
+                if(index!=-1)
                 {
                     found0PP = true;
-                    real dy = file.getX(idx)-posY;
-                    real dz = file.getY(idx)-posZ;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
                     this->weights0PP.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, idx));
+                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
                 }
             }
 
             if(!found0PM) //NB in simulation is ES in precursor
             {
-                int idx = file.findNeighborPMM(posY, posZ, 0.f);
-                if(idx!=-1)
+                int index = file.findNeighborPMM(posY, posZ, 0.f);
+                if(index!=-1)
                 {
                     found0PM = true;
-                    real dy = file.getX(idx)-posY;
-                    real dz = file.getY(idx)-posZ;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
                     this->weights0PM.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, idx));
+                    this->planeNeighbor0PP.emplace_back(getWriteIndex(level, fileId, index));
                 }
             }
 
             if(!found0MP) //ST in simulation is WN in precursor
             {
-                int idx = file.findNeighborMPM(posY, posZ, 0.f);
-                if(idx!=-1)
+                int index = file.findNeighborMPM(posY, posZ, 0.f);
+                if(index!=-1)
                 {
                     found0MP = true;
-                    real dy = file.getX(idx)-posY;
-                    real dz = file.getY(idx)-posZ;
+                    real dy = file.getX(index)-posY;
+                    real dz = file.getY(index)-posZ;
                     this->weights0MP.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                    this->planeNeighbor0MP.emplace_back(getWriteIndex(level, fileId, idx));
+                    this->planeNeighbor0MP.emplace_back(getWriteIndex(level, fileId, index));
                 }
             }
 
@@ -413,7 +417,7 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
             {
                 numberOfFiles++;
 
-                printf("switching to precursor file no. %zd\n", numberOfFiles);
+                printf("switching to precursor file no. %zu\n", numberOfFiles);
                 if(numberOfFiles == this->fileCollection->files[level][id].size())
                     throw std::runtime_error("Not enough Precursor Files to read");
 
@@ -437,4 +441,4 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
             this->nFile[level][id] = numberOfFiles;
         }
     // }
-}
\ No newline at end of file
+}
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
index 5bee61e194670c74ca8bd8da87f3881956fff466..1663a3ff37ba1bb062647847462d4e364baed93b 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.h
@@ -32,7 +32,7 @@ struct Quantity
 class VTKFile
 {
 public: 
-    VTKFile(std::string _fileName): 
+    explicit VTKFile(std::string _fileName): 
     fileName(_fileName)
     {
         readHeader();
@@ -100,9 +100,9 @@ public:
 
     virtual ~FileCollection() = default;
 
-    virtual size_t getNumberOfQuantities()=0;
+    virtual size_t getNumberOfQuantities() = 0;
 
-    virtual FileType getFileType()=0;
+    virtual FileType getFileType() = 0;
 
 protected:
     std::string prefix;
@@ -118,8 +118,8 @@ public:
         findFiles();
     };
 
-    FileType getFileType(){ return FileType::VTK; };
-    size_t getNumberOfQuantities(){ return files[0][0][0].getNumberOfQuantities(); }
+    FileType getFileType() override{ return FileType::VTK; };
+    size_t getNumberOfQuantities() override{ return files[0][0][0].getNumberOfQuantities(); }
     
 
 private:
@@ -198,4 +198,4 @@ private:
 SPtr<FileCollection> createFileCollection(std::string prefix, FileType type);
 SPtr<TransientBCInputFileReader> createReaderForCollection(SPtr<FileCollection> fileCollection, uint readLevel);
 
-#endif //TRANSIENTBCSETTER_H_
\ No newline at end of file
+#endif //TRANSIENTBCSETTER_H_
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index 0900c2d587ba9811c480427b833e7e083216cf10..5b191ee4e3fcdc0ec71633111085f70c5dc43479 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -65,14 +65,14 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
-                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF 
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
-                                            
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
+
                                             //! Enforce overlap of BCs on edge nodes
                                             ||  grid->getFieldEntry(index)  == vf::gpu::BC_PRESSURE
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_VELOCITY 
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_NOSLIP   
-                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_SLIP     
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_VELOCITY
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_NOSLIP
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_SLIP
                                             ||  grid->getFieldEntry(index)  == vf::gpu::BC_STRESS ))
             {
                 grid->setFieldEntry(index, boundaryCondition->getType());
@@ -175,7 +175,7 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin
         bool alignedWithNormal = (this->getNormal()[0]*grid->getDirection()[dir * DIMENSION + 0]+
                                   this->getNormal()[1]*grid->getDirection()[dir * DIMENSION + 1]+
                                   this->getNormal()[2]*grid->getDirection()[dir * DIMENSION + 2] ) > 0;
-        
+
         uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ );
         if((grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
             grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID          ||
@@ -185,7 +185,7 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin
         else
             qNode[dir] = -1.0;
     }
-    
+
     boundaryCondition->qs.push_back(qNode);
 }
 
@@ -214,7 +214,7 @@ void Geometry::addIndices(std::vector<SPtr<Grid> > grids, uint level, SPtr<Bound
 
         for (int dir = 0; dir <= grids[level]->getEndDirection(); dir++)
         {
-			const real q = grids[level]->getQValue(index, dir);
+            const real q = grids[level]->getQValue(index, dir);
 
             qNode[dir] = q;
 
@@ -284,7 +284,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     real coordinateNormal = grid[level]->getStartY() + grid[level]->getDelta();
 
     if( coordinateNormal > grid[0]->getStartY() + grid[0]->getDelta() ) return;
-    
+
     Side::addIndices(grid[level], boundaryCondition, "y", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
@@ -331,6 +331,6 @@ void PZ::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     real coordinateNormal = grid[level]->getEndZ() - grid[level]->getDelta();
 
     if( coordinateNormal < grid[0]->getEndZ() - grid[0]->getDelta() ) return;
-    
+
     Side::addIndices(grid[level], boundaryCondition, "z", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index 065665d216e3cf7904530b94c8bb6480bb565c8a..f3d850384816f6690e5ffc158bbdc5e1df0ab328 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -163,4 +163,4 @@ public:
 
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index ff6f4913e4cd8a32c05272ef583f90a2cf226edc..003e6dcd223d2bf019c83f71349a9a7bec84efdc 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -105,17 +105,17 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX;
-			boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY;
-			boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ;
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->normalX = normalX;
+            boundaryConditions[level]->geometryBoundaryCondition->normalY = normalY;
+            boundaryConditions[level]->geometryBoundaryCondition->normalZ = normalZ;
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             boundaryConditions[level]->geometryBoundaryCondition->fillSlipNormalLists();
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
     }
 }
 
@@ -123,10 +123,10 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
 //! \brief Set stress boundary concdition using iMEM
 //! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled
 //! \param z0 roughness length [m]
-//! \param dx dx of level 0 [m] 
+//! \param dx dx of level 0 [m]
 //!
-void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType, 
-                                                    real nomalX, real normalY, real normalZ, 
+void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType,
+                                                    real nomalX, real normalY, real normalZ,
                                                     uint samplingOffset, real z0, real dx)
 {
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
@@ -178,17 +178,17 @@ void LevelGridBuilder::setVelocityGeometryBoundaryCondition(real vx, real vy, re
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->vx = vx;
-			boundaryConditions[level]->geometryBoundaryCondition->vy = vy;
-			boundaryConditions[level]->geometryBoundaryCondition->vz = vz;
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->vx = vx;
+            boundaryConditions[level]->geometryBoundaryCondition->vy = vy;
+            boundaryConditions[level]->geometryBoundaryCondition->vz = vz;
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             boundaryConditions[level]->geometryBoundaryCondition->fillVelocityLists();
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry Velocity BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
     }
 }
 
@@ -230,7 +230,7 @@ void LevelGridBuilder::setNoSlipBoundaryCondition(SideType sideType)
             noSlipBoundaryCondition->fillVelocityLists();
 
             // now effectively just a wrapper for velocityBC with zero velocity. No distinction in Gridgenerator.
-            boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition); 
+            boundaryConditions[level]->velocityBoundaryConditions.push_back(noSlipBoundaryCondition);
         }
     }
 }
@@ -241,23 +241,23 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition()
 
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-		if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
-		{
-			boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
+        if (boundaryConditions[level]->geometryBoundaryCondition != nullptr)
+        {
+            boundaryConditions[level]->geometryBoundaryCondition->side->addIndices(grids, level, boundaryConditions[level]->geometryBoundaryCondition);
 
             *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Geometry No-Slip BC on level " << level << " with " << (int)boundaryConditions[level]->geometryBoundaryCondition->indices.size() <<"\n";
-		}
+        }
     }
 }
 
-void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, 
+void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads,
                                                         real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap)
 {
-    if(fileLevelToGridLevelMap.empty())                         
+    if(fileLevelToGridLevelMap.empty())
     {
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Mapping precursor file levels to the corresponding grid levels" << "\n";
 
-        for (uint level = 0; level < getNumberOfGridLevels(); level++)  
+        for (uint level = 0; level < getNumberOfGridLevels(); level++)
             fileLevelToGridLevelMap.push_back(level);
     }
     else
@@ -413,9 +413,9 @@ std::shared_ptr<Grid> LevelGridBuilder::getGrid(int level, int box)
 void LevelGridBuilder::checkLevel(int level)
 {
     if (level >= (int)grids.size())
-    { 
+    {
         std::cout << "wrong level input... return to caller\n";
-        return; 
+        return;
     }
 }
 
@@ -426,16 +426,16 @@ void LevelGridBuilder::getDimensions(int &nx, int &ny, int &nz, const int level)
     nz = grids[level]->getNumberOfNodesZ();
 }
 
-void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords, 
-                                     uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, 
+void LevelGridBuilder::getNodeValues(real *xCoords, real *yCoords, real *zCoords,
+                                     uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative,
                                      uint *geo, const int level) const
 {
     grids[level]->getNodeValues(xCoords, yCoords, zCoords, neighborX, neighborY, neighborZ, neighborNegative, geo);
 }
 
 
-GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const 
-{ 
+GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndices(uint *fluidNodeIndices, const int level) const
+{
     grids[level]->getFluidNodeIndices(fluidNodeIndices);
 }
 
@@ -444,9 +444,9 @@ GRIDGENERATOR_EXPORT void LevelGridBuilder::getFluidNodeIndicesBorder(uint *flui
     grids[level]->getFluidNodeIndicesBorder(fluidNodeIndices);
 }
 
-uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const 
+uint LevelGridBuilder::getNumberOfFluidNodes(unsigned int level) const
 {
-    return grids[level]->getNumberOfFluidNodes(); 
+    return grids[level]->getNumberOfFluidNodes();
 }
 
 GRIDGENERATOR_EXPORT uint LevelGridBuilder::getNumberOfFluidNodesBorder(unsigned int level) const
@@ -472,7 +472,7 @@ void LevelGridBuilder::getSlipValues(real* normalX, real* normalY, real* normalZ
         for (uint index = 0; index < boundaryCondition->indices.size(); index++)
         {
             indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[index]) + 1;
-            
+
             normalX[allIndicesCounter] = boundaryCondition->getNormalx(index);
             normalY[allIndicesCounter] = boundaryCondition->getNormaly(index);
             normalZ[allIndicesCounter] = boundaryCondition->getNormalz(index);
@@ -507,9 +507,9 @@ uint LevelGridBuilder::getStressSize(int level) const
     return size;
 }
 
-void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ, 
-                                        real* vx,      real* vy,      real* vz, 
-                                        real* vx1,     real* vy1,     real* vz1, 
+void LevelGridBuilder::getStressValues( real* normalX, real* normalY, real* normalZ,
+                                        real* vx,      real* vy,      real* vz,
+                                        real* vx1,     real* vy1,     real* vz1,
                                         int* indices, int* samplingIndices, int* samplingOffset, real* z0, int level) const
 {
 
@@ -565,7 +565,7 @@ void LevelGridBuilder::getVelocityValues(real* vx, real* vy, real* vz, int* indi
     {
         for (uint i = 0; i < (uint)boundaryCondition->indices.size(); i++)
         {
-            indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1;  
+            indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1;
 
             vx[allIndicesCounter] = boundaryCondition->getVx(i);
             vy[allIndicesCounter] = boundaryCondition->getVy(i);
@@ -644,17 +644,17 @@ uint LevelGridBuilder::getPrecursorSize(int level) const
     return size;
 }
 
-void LevelGridBuilder::getPrecursorValues(  uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, 
+void LevelGridBuilder::getPrecursorValues(  uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM,
                                             real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM,
-                                            int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
-                                            int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads, 
+                                            int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader,
+                                            int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads,
                                             real& velocityX, real& velocityY, real& velocityZ, int level) const
 {
     int allIndicesCounter = 0;
     int allNodesCounter = 0;
     uint tmpTimeStepsBetweenReads = 0;
     size_t tmpNumberOfQuantities = 0;
-    
+
     for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
     {
         if( tmpTimeStepsBetweenReads == 0 )
@@ -680,7 +680,7 @@ void LevelGridBuilder::getPrecursorValues(  uint* neighbor0PP, uint* neighbor0PM
         BCreader->getWeights(weights0PP, weights0PM, weights0MP, weights0MM);
         if(tmpNumberOfQuantities == 0)
             tmpNumberOfQuantities = BCreader->getNumberOfQuantities();
-        if(tmpNumberOfQuantities != BCreader->getNumberOfQuantities()) 
+        if(tmpNumberOfQuantities != BCreader->getNumberOfQuantities())
             throw std::runtime_error("All precursor files must have the same quantities.");
         allNodesCounter += BCreader->getNPointsRead();
         velocityX = boundaryCondition->getVelocityX();
@@ -692,7 +692,7 @@ void LevelGridBuilder::getPrecursorValues(  uint* neighbor0PP, uint* neighbor0PM
     if (tmpTimeStepsBetweenReads == 0)
         throw std::runtime_error("timeStepsBetweenReads of precursor needs to be larger than 0.");
     timeStepsBetweenReads = tmpTimeStepsBetweenReads;
-    
+
     if (tmpNumberOfQuantities == 0)
         throw std::runtime_error("Number of quantities in precursor needs to be larger than 0.");
     numberOfQuantities = tmpNumberOfQuantities;
@@ -718,7 +718,7 @@ uint LevelGridBuilder::getGeometrySize(int level) const
 {
     if (boundaryConditions[level]->geometryBoundaryCondition)
         return  (uint)boundaryConditions[level]->geometryBoundaryCondition->indices.size();
-    
+
     return 0;
 }
 
@@ -739,9 +739,9 @@ void LevelGridBuilder::getGeometryValues(real* vx, real* vy, real* vz, int level
 {
     for (uint i = 0; i < boundaryConditions[level]->geometryBoundaryCondition->indices.size(); i++)
     {
-		vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i);
-		vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i);
-		vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i);
+        vx[i] = boundaryConditions[level]->geometryBoundaryCondition->getVx(i);
+        vy[i] = boundaryConditions[level]->geometryBoundaryCondition->getVy(i);
+        vz[i] = boundaryConditions[level]->geometryBoundaryCondition->getVz(i);
     }
 }
 
@@ -756,7 +756,7 @@ void LevelGridBuilder::getGeometryQs(real* qs[27], int level) const
     }
 }
 
-void LevelGridBuilder::writeArrows(std::string fileName) const 
+void LevelGridBuilder::writeArrows(std::string fileName) const
 {
     QLineWriter::writeArrows(fileName, boundaryConditions[getNumberOfGridLevels() - 1]->geometryBoundaryCondition, grids[getNumberOfGridLevels() - 1]);
 }
@@ -797,7 +797,7 @@ void LevelGridBuilder::findFluidNodes(bool splitDomain)
 }
 
 
-void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) 
+void LevelGridBuilder::addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level)
 {
     grids[level]->addFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars);
 }
@@ -827,17 +827,17 @@ void LevelGridBuilder::sortFluidNodeIndicesAllFeatures(uint level)
     grids[level]->sortFluidNodeIndicesAllFeatures();
 }
 
-uint LevelGridBuilder::getNumberOfFluidNodesMacroVars(unsigned int level) const 
+uint LevelGridBuilder::getNumberOfFluidNodesMacroVars(unsigned int level) const
 {
     return grids[level]->getNumberOfFluidNodeIndicesMacroVars();
 }
 
-void LevelGridBuilder::getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const 
+void LevelGridBuilder::getFluidNodeIndicesMacroVars(uint *fluidNodeIndicesMacroVars, const int level) const
 {
     grids[level]->getFluidNodeIndicesMacroVars(fluidNodeIndicesMacroVars);
 }
 
-uint LevelGridBuilder::getNumberOfFluidNodesApplyBodyForce(unsigned int level) const 
+uint LevelGridBuilder::getNumberOfFluidNodesApplyBodyForce(unsigned int level) const
 {
     return grids[level]->getNumberOfFluidNodeIndicesApplyBodyForce();
 }
@@ -855,4 +855,4 @@ uint LevelGridBuilder::getNumberOfFluidNodesAllFeatures(unsigned int level) cons
 void LevelGridBuilder::getFluidNodeIndicesAllFeatures(uint *fluidNodeIndicesAllFeatures, const int level) const
 {
     grids[level]->getFluidNodeIndicesAllFeatures(fluidNodeIndicesAllFeatures);
-}
\ No newline at end of file
+}
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 56ae1e4fce6185591fba97f49ba504ced259aea5..2e0eaf13080c46260de2a0c845fbf784a2cc3e09 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -86,8 +86,8 @@ public:
     GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho);
     GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z);
     GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType);
-    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads, 
-                                                            real velocityX=c0o1, real velocityY=c0o1, real velocityZ=c0o1,     
+    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<FileCollection> fileCollection, int timeStepsBetweenReads,
+                                                            real velocityX=c0o1, real velocityY=c0o1, real velocityZ=c0o1,
                                                             std::vector<uint> fileLevelToGridLevelMap = {});
 
     GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall);
@@ -106,7 +106,7 @@ public:
     GRIDGENERATOR_EXPORT virtual void getFluidNodeIndicesBorder(uint *fluidNodeIndices, const int level) const override;
 
     GRIDGENERATOR_EXPORT virtual void getNodeValues(real *xCoords, real *yCoords, real *zCoords,
-                                         uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative, 
+                                         uint *neighborX, uint *neighborY, uint *neighborZ, uint *neighborNegative,
                                          uint *geo, const int level) const override;
     GRIDGENERATOR_EXPORT virtual void getDimensions(int &nx, int &ny, int &nz, const int level) const override;
 
@@ -116,12 +116,12 @@ public:
     GRIDGENERATOR_EXPORT virtual void getSlipQs(real* qs[27], int level) const override;
 
     GRIDGENERATOR_EXPORT uint getStressSize(int level) const override;
-    GRIDGENERATOR_EXPORT virtual void getStressValues(  real* normalX, real* normalY, real* normalZ, 
-                                                        real* vx,      real* vy,      real* vz, 
-                                                        real* vx1,     real* vy1,     real* vz1, 
+    GRIDGENERATOR_EXPORT virtual void getStressValues(  real* normalX, real* normalY, real* normalZ,
+                                                        real* vx,      real* vy,      real* vz,
+                                                        real* vx1,     real* vy1,     real* vz1,
                                                         int* indices, int* samplingIndices, int* samplingOffsets, real* z0, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getStressQs(real* qs[27], int level) const override;
-        
+
     GRIDGENERATOR_EXPORT uint getVelocitySize(int level) const override;
     GRIDGENERATOR_EXPORT virtual void getVelocityValues(real* vx, real* vy, real* vz, int* indices, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getVelocityQs(real* qs[27], int level) const override;
@@ -131,9 +131,9 @@ public:
     GRIDGENERATOR_EXPORT virtual void getPressureQs(real* qs[27], int level) const override;
 
     GRIDGENERATOR_EXPORT uint getPrecursorSize(int level) const override;
-    GRIDGENERATOR_EXPORT void getPrecursorValues(   uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM, 
-                                                    real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM, 
-                                                    int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader, 
+    GRIDGENERATOR_EXPORT void getPrecursorValues(   uint* neighbor0PP, uint* neighbor0PM, uint* neighbor0MP, uint* neighbor0MM,
+                                                    real* weights0PP, real* weights0PM, real* weights0MP, real* weights0MM,
+                                                    int* indices, std::vector<SPtr<TransientBCInputFileReader>>& reader,
                                                     int& numberOfPrecursorNodes, size_t& numberOfQuantities, uint& timeStepsBetweenReads,
                                                     real& velocityX, real& velocityY, real& velocityZ, int level) const override;
     GRIDGENERATOR_EXPORT virtual void getPrecursorQs(real* qs[27], int level) const override;
@@ -150,11 +150,11 @@ public:
     GRIDGENERATOR_EXPORT SPtr<GeometryBoundaryCondition> getGeometryBoundaryCondition(uint level) const override;
 
 protected:
-    
+
 
     struct BoundaryConditions
     {
-		BoundaryConditions() = default;
+        BoundaryConditions() = default;
 
         std::vector<SPtr<SlipBoundaryCondition>> slipBoundaryConditions;
 
@@ -174,7 +174,7 @@ protected:
 
     std::vector<std::shared_ptr<Grid> > grids;
     std::vector<SPtr<BoundaryConditions> > boundaryConditions;
-    
+
     std::array<uint, 6> communicationProcesses;
 
     void checkLevel(int level);
@@ -217,7 +217,7 @@ public:
     void addFluidNodeIndicesMacroVars(const std::vector<uint>& fluidNodeIndicesMacroVars, uint level) override;
     void addFluidNodeIndicesApplyBodyForce(const std::vector<uint>& fluidNodeIndicesApplyBodyForce, uint level) override;
     void addFluidNodeIndicesAllFeatures(const std::vector<uint>& fluidNodeIndicesAllFeatures, uint level) override;
-    
+
     void sortFluidNodeIndicesMacroVars(uint level) override;
     void sortFluidNodeIndicesApplyBodyForce(uint level) override;
     void sortFluidNodeIndicesAllFeatures(uint level) override;
@@ -231,4 +231,3 @@ public:
 };
 
 #endif
-
diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp
index 24dd169e70288b9ae18a29c23dc0c74b14246e3f..05c684410166e329ba63bbe3bdbf0c09e3a881ab 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cpp
+++ b/src/gpu/GridGenerator/grid/GridImp.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -33,7 +33,6 @@
 #include "GridImp.h"
 
 #include <iostream>
-#include <omp.h>
 #include <sstream>
 # include <algorithm>
 #include <cmath>
@@ -61,8 +60,8 @@ int DIRECTIONS[DIR_END_MAX][DIMENSION];
 
 using namespace vf::gpu;
 
-GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level) 
-            : object(object), 
+GridImp::GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, Distribution distribution, uint level)
+            : object(object),
     startX(startX),
     startY(startY),
     startZ(startZ),
@@ -135,7 +134,7 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->initalNodeToOutOfGrid(index);
-    
+
     if( this->innerRegionFromFinerGrid ){
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start setInnerBasedOnFinerGrid()\n";
         this->setInnerBasedOnFinerGrid(fineGrid);
@@ -147,12 +146,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start addOverlap()\n";
     this->addOverlap();
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixOddCells()\n";
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->fixOddCell(index);
-    
+
     if( enableFixRefinementIntoTheWall )
     {
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start fixRefinementIntoWall()\n";
@@ -180,12 +179,12 @@ void GridImp::inital(const SPtr<Grid> fineGrid, uint numberOfLayers)
             }
         }
     }
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Start findEndOfGridStopperNodes()\n";
 #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->findEndOfGridStopperNode(index);
-    
+
     *logging::out << logging::Logger::INFO_INTERMEDIATE
         << "Grid created: " << "from (" << this->startX << ", " << this->startY << ", " << this->startZ << ") to (" << this->endX << ", " << this->endY << ", " << this->endZ << ")\n"
         << "nodes: " << this->nx << " x " << this->ny << " x " << this->nz << " = " << this->size << "\n";
@@ -209,9 +208,9 @@ void GridImp::freeMemory()
     if( this->neighborIndexZ        != nullptr ) { delete[] this->neighborIndexZ;        this->neighborIndexZ        = nullptr; }
     if( this->neighborIndexNegative != nullptr ) { delete[] this->neighborIndexNegative; this->neighborIndexNegative = nullptr; }
     if( this->sparseIndices         != nullptr ) { delete[] this->sparseIndices;         this->sparseIndices         = nullptr; }
-	if( this->qIndices              != nullptr ) { delete[] this->qIndices;              this->qIndices              = nullptr; }
-	if( this->qValues               != nullptr ) { delete[] this->qValues;               this->qValues               = nullptr; }
-	if( this->qPatches              != nullptr ) { delete[] this->qPatches;              this->qPatches              = nullptr; }
+    if( this->qIndices              != nullptr ) { delete[] this->qIndices;              this->qIndices              = nullptr; }
+    if( this->qValues               != nullptr ) { delete[] this->qValues;               this->qValues               = nullptr; }
+    if( this->qPatches              != nullptr ) { delete[] this->qPatches;              this->qPatches              = nullptr; }
 
     field.freeMemory();
 }
@@ -254,7 +253,7 @@ void GridImp::discretize(Object* solidObject, char innerType, char outerType)
         this->sparseIndices[index] = index;
 
         if( this->getFieldEntry(index) == innerType ) continue;
-        
+
         real x, y, z;
         this->transIndexToCoords(index, x, y, z);
 
@@ -279,7 +278,7 @@ bool GridImp::isInside(const Cell& cell) const
 //    |       +-----+-----+-----+           | +-----+-----+-----+
 //    +---------+                           +---------+
 //               0     1     2                   0     1     2
-//              even      even                        even     
+//              even      even                        even
 //                   odd                        odd         odd
 //
 Cell GridImp::getOddCellFromIndex(uint index) const
@@ -349,7 +348,7 @@ void GridImp::addOverlap()
 void GridImp::setOverlapTmp( uint index )
 {
     if( this->field.is( index, INVALID_OUT_OF_GRID ) ){
-        
+
         if( this->hasNeighborOfType(index, FLUID) ){
             this->field.setFieldEntry( index, OVERLAP_TMP );
         }
@@ -380,7 +379,7 @@ void GridImp::fixRefinementIntoWall(uint xIndex, uint yIndex, uint zIndex, int d
     if(  this->xOddStart && ( dir == 1 || dir == -1 ) && ( xIndex % 2 == 0 && xIndex != 0 ) ) return;
     if(  this->yOddStart && ( dir == 2 || dir == -2 ) && ( yIndex % 2 == 0 && yIndex != 0 ) ) return;
     if(  this->zOddStart && ( dir == 3 || dir == -3 ) && ( zIndex % 2 == 0 && zIndex != 0 ) ) return;
-    
+
     //////////////////////////////////////////////////////////////////////////
 
     real dx{ 0.0 }, dy{ 0.0 }, dz{ 0.0 };
@@ -433,31 +432,31 @@ void GridImp::findStopperNode(uint index) // deprecated
 
 void GridImp::findEndOfGridStopperNode(uint index)
 {
-	if (isValidEndOfGridStopper(index)){
+    if (isValidEndOfGridStopper(index)){
         if( this->level != 0 )
-		    this->field.setFieldEntryToStopperOutOfGrid(index);
+            this->field.setFieldEntryToStopperOutOfGrid(index);
         else
             this->field.setFieldEntryToStopperOutOfGridBoundary(index);
     }
-    
-	if (isValidEndOfGridBoundaryStopper(index))
-		this->field.setFieldEntryToStopperOutOfGridBoundary(index);
+
+    if (isValidEndOfGridBoundaryStopper(index))
+        this->field.setFieldEntryToStopperOutOfGridBoundary(index);
 }
 
 void GridImp::findSolidStopperNode(uint index)
 {
-	if (isValidSolidStopper(index))
-		this->field.setFieldEntry(index, STOPPER_SOLID);
+    if (isValidSolidStopper(index))
+        this->field.setFieldEntry(index, STOPPER_SOLID);
 }
 
 void GridImp::findBoundarySolidNode(uint index)
 {
-	if (shouldBeBoundarySolidNode(index)) 
-	{
-		this->field.setFieldEntry(index, BC_SOLID);
-		this->qIndices[index] = this->numberOfSolidBoundaryNodes++;
-		//grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1);
-	}
+    if (shouldBeBoundarySolidNode(index))
+    {
+        this->field.setFieldEntry(index, BC_SOLID);
+        this->qIndices[index] = this->numberOfSolidBoundaryNodes++;
+        //grid->setNumberOfSolidBoundaryNodes(grid->getNumberOfSolidBoundaryNodes() + 1);
+    }
 }
 
 void GridImp::fixOddCell(uint index)
@@ -483,9 +482,9 @@ bool GridImp::isOutSideOfGrid(Cell &cell) const
 bool GridImp::contains(Cell &cell, char type) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
-			continue;
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
+            continue;
         if (field.is(index, type))
             return true;
     }
@@ -495,8 +494,8 @@ bool GridImp::contains(Cell &cell, char type) const
 bool GridImp::cellContainsOnly(Cell &cell, char type) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
             return false;
         if (!field.is(index, type))
             return false;
@@ -507,8 +506,8 @@ bool GridImp::cellContainsOnly(Cell &cell, char type) const
 bool GridImp::cellContainsOnly(Cell &cell, char typeA, char typeB) const
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
             return false;
         if (!field.is(index, typeA) && !field.is(index, typeB))
             return false;
@@ -524,91 +523,91 @@ const Object * GridImp::getObject() const
 void GridImp::setNodeTo(Cell &cell, char type)
 {
     for (const auto point : cell) {
-		uint index = transCoordToIndex(point.x, point.y, point.z);
-		if (index == INVALID_INDEX)
-			continue;
-		field.setFieldEntry(index, type);
+        uint index = transCoordToIndex(point.x, point.y, point.z);
+        if (index == INVALID_INDEX)
+            continue;
+        field.setFieldEntry(index, type);
     }
 }
 
 void GridImp::setNodeTo(uint index, char type)
 {
-	if( index != INVALID_INDEX )
-		field.setFieldEntry(index, type);
+    if( index != INVALID_INDEX )
+        field.setFieldEntry(index, type);
 }
 
 bool GridImp::isNode(uint index, char type) const
 {
     if( index != INVALID_INDEX )
-		return field.is(index, type);
+        return field.is(index, type);
 
     throw std::runtime_error("GridImp::isNode() -> index == INVALID_INDEX not supported.");
 }
 
 bool GridImp::isValidEndOfGridStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, INVALID_OUT_OF_GRID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, INVALID_OUT_OF_GRID))
+        return false;
 
-	return hasNeighborOfType(index, FLUID);
+    return hasNeighborOfType(index, FLUID);
 }
 
 bool GridImp::isValidEndOfGridBoundaryStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, FLUID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, FLUID))
+        return false;
 
-	return ! hasAllNeighbors(index);
+    return ! hasAllNeighbors(index);
 }
 
 bool GridImp::isValidSolidStopper(uint index) const
 {
-	// Lenz: also includes corner stopper nodes
-	if (!this->field.is(index, INVALID_SOLID))
-		return false;
+    // Lenz: also includes corner stopper nodes
+    if (!this->field.is(index, INVALID_SOLID))
+        return false;
 
-	return hasNeighborOfType(index, FLUID);
+    return hasNeighborOfType(index, FLUID);
 }
 
 bool GridImp::shouldBeBoundarySolidNode(uint index) const
 {
-	if (!this->field.is(index, FLUID))
-		return false;
+    if (!this->field.is(index, FLUID))
+        return false;
 
-	return hasNeighborOfType(index, STOPPER_SOLID);
+    return hasNeighborOfType(index, STOPPER_SOLID);
 }
 
 bool GridImp::hasAllNeighbors(uint index) const
 {
-	// new version by Lenz, utilizes the range based for loop for all directions
-	real x, y, z;
-	this->transIndexToCoords(index, x, y, z);
-	for (const auto dir : this->distribution) {
-		const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
+    // new version by Lenz, utilizes the range based for loop for all directions
+    real x, y, z;
+    this->transIndexToCoords(index, x, y, z);
+    for (const auto dir : this->distribution) {
+        const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
 
-		if (neighborIndex == INVALID_INDEX) return false;
-	}
+        if (neighborIndex == INVALID_INDEX) return false;
+    }
 
-	return true;
+    return true;
 }
 
 bool GridImp::hasNeighborOfType(uint index, char type) const
 {
-	// new version by Lenz, utilizes the range based for loop for all directions
-	real x, y, z;
-	this->transIndexToCoords(index, x, y, z);
-	for (const auto dir : this->distribution) {
-		const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
+    // new version by Lenz, utilizes the range based for loop for all directions
+    real x, y, z;
+    this->transIndexToCoords(index, x, y, z);
+    for (const auto dir : this->distribution) {
+        const uint neighborIndex = this->transCoordToIndex(x + dir[0] * this->getDelta(), y + dir[1] * this->getDelta(), z + dir[2] * this->getDelta());
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		if (this->field.is(neighborIndex, type))
-			return true;
-	}
+        if (this->field.is(neighborIndex, type))
+            return true;
+    }
 
-	return false;
+    return false;
 }
 
 bool GridImp::nodeInNextCellIs(int index, char type) const
@@ -630,13 +629,13 @@ bool GridImp::nodeInNextCellIs(int index, char type) const
 
     const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ);
 
-	const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX, type);
-	const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY, type);
-	const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY, type);
-	const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ, type);
-	const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ, type);
-	const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ, type);
-	const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
+    const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX, type);
+    const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY, type);
+    const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY, type);
+    const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ, type);
+    const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ, type);
+    const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ, type);
+    const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
 
     return typeX || typeY || typeXY || typeZ || typeYZ
         || typeXZ || typeXYZ;
@@ -661,13 +660,13 @@ bool GridImp::nodeInPreviousCellIs(int index, char type) const
 
     const uint indexXYZ = transCoordToIndex(neighborX, neighborY, neighborZ);
 
-	const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX  , type);
-	const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY  , type);
-	const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY , type);
-	const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ  , type);
-	const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ , type);
-	const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ , type);
-	const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
+    const bool typeX   = indexX   == INVALID_INDEX ? false : this->field.is(indexX  , type);
+    const bool typeY   = indexY   == INVALID_INDEX ? false : this->field.is(indexY  , type);
+    const bool typeXY  = indexXY  == INVALID_INDEX ? false : this->field.is(indexXY , type);
+    const bool typeZ   = indexZ   == INVALID_INDEX ? false : this->field.is(indexZ  , type);
+    const bool typeYZ  = indexYZ  == INVALID_INDEX ? false : this->field.is(indexYZ , type);
+    const bool typeXZ  = indexXZ  == INVALID_INDEX ? false : this->field.is(indexXZ , type);
+    const bool typeXYZ = indexXYZ == INVALID_INDEX ? false : this->field.is(indexXYZ, type);
 
     return typeX || typeY || typeXY || typeZ || typeYZ
         || typeXZ || typeXYZ;
@@ -678,8 +677,8 @@ bool GridImp::nodeInCellIs(Cell& cell, char type) const
     for (const auto node : cell)
     {
         const uint index = transCoordToIndex(node.x, node.y, node.z);
-		if (index == INVALID_INDEX)
-			continue;
+        if (index == INVALID_INDEX)
+            continue;
         if (field.is(index, type))
             return true;
     }
@@ -696,9 +695,9 @@ void GridImp::setCellTo(uint index, char type)
     for (const auto node : cell)
     {
         const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z);
-		if (nodeIndex == INVALID_INDEX)
-			continue;
-		this->field.setFieldEntry(nodeIndex, type);
+        if (nodeIndex == INVALID_INDEX)
+            continue;
+        this->field.setFieldEntry(nodeIndex, type);
     }
 }
 
@@ -712,10 +711,10 @@ void GridImp::setNonStopperOutOfGridCellTo(uint index, char type)
     for (const auto node : cell)
     {
         const uint nodeIndex = transCoordToIndex(node.x, node.y, node.z);
-		if (nodeIndex == INVALID_INDEX)
-			continue;
+        if (nodeIndex == INVALID_INDEX)
+            continue;
 
-        if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID && 
+        if( this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID &&
             this->getFieldEntry( nodeIndex ) != STOPPER_OUT_OF_GRID_BOUNDARY )
             this->field.setFieldEntry(nodeIndex, type);
     }
@@ -770,7 +769,7 @@ uint GridImp::transCoordToIndex(const real &x, const real &y, const real &z) con
     const uint yIndex = getYIndex(y);
     const uint zIndex = getZIndex(z);
 
-	if (xIndex >= nx || yIndex >= ny || zIndex >= nz)
+    if (xIndex >= nx || yIndex >= ny || zIndex >= nz)
         return INVALID_INDEX;
 
     return xIndex + nx * (yIndex + ny * zIndex);
@@ -819,20 +818,20 @@ TriangularMeshDiscretizationStrategy * GridImp::getTriangularMeshDiscretizationS
 
 uint GridImp::getNumberOfSolidBoundaryNodes() const
 {
-	return this->numberOfSolidBoundaryNodes;
+    return this->numberOfSolidBoundaryNodes;
 }
 
 void GridImp::setNumberOfSolidBoundaryNodes(uint numberOfSolidBoundaryNodes)
 {
-	if (numberOfSolidBoundaryNodes < INVALID_INDEX)
-		this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes;
+    if (numberOfSolidBoundaryNodes < INVALID_INDEX)
+        this->numberOfSolidBoundaryNodes = numberOfSolidBoundaryNodes;
 }
 
 real GridImp::getQValue(const uint index, const uint dir) const
 {
-	const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index];
+    const int qIndex = dir * this->numberOfSolidBoundaryNodes + this->qIndices[index];
 
-	return this->qValues[qIndex];
+    return this->qValues[qIndex];
 }
 
 uint GridImp::getQPatch(const uint index) const
@@ -858,7 +857,7 @@ void GridImp::findSparseIndices(SPtr<Grid> finerGrid)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Find sparse indices...";
     auto fineGrid = std::static_pointer_cast<GridImp>(finerGrid);
-    
+
     this->updateSparseIndices();
 
 #pragma omp parallel for
@@ -906,7 +905,7 @@ void GridImp::updateSparseIndices()
     sparseSize = size - removedNodes;
 }
 
-void GridImp::findFluidNodeIndices(bool splitDomain) 
+void GridImp::findFluidNodeIndices(bool splitDomain)
 {
     // find sparse index of all fluid nodes
     this->fluidNodeIndices.clear();
@@ -935,7 +934,7 @@ void GridImp::findFluidNodeIndicesBorder() {
     // resize fluidNodeIndicesBorder (for better performance in copy operation)
     size_t newSize = 0;
     for (CommunicationIndices& ci : this->communicationIndices)
-        newSize += ci.sendIndices.size();    
+        newSize += ci.sendIndices.size();
     this->fluidNodeIndicesBorder.reserve(newSize);
 
     // copy all send indices to fluidNodeIndicesBorder
@@ -968,7 +967,7 @@ void GridImp::setNeighborIndices(uint index)
         this->setStopperNeighborCoords(index);
         return;
     }
-     
+
     if (this->sparseIndices[index] == -1)
         return;
 
@@ -1002,9 +1001,9 @@ void GridImp::setStopperNeighborCoords(uint index)
     if (vf::Math::lessEqual(z + delta, endZ + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y, z + delta)))
         neighborIndexZ[index] = getSparseIndex(x, y, z + delta);
 
-    if (vf::Math::greaterEqual(x - delta, endX) && 
-        vf::Math::greaterEqual(y - delta, endY) && 
-        vf::Math::greaterEqual(z - delta, endZ) && 
+    if (vf::Math::greaterEqual(x - delta, endX) &&
+        vf::Math::greaterEqual(y - delta, endY) &&
+        vf::Math::greaterEqual(z - delta, endZ) &&
         !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x - delta, y - delta, z - delta)))
     {
         neighborIndexNegative[index] = getSparseIndex(x - delta, y - delta, z - delta);
@@ -1035,7 +1034,7 @@ real GridImp::getNeighborCoord(bool periodicity, real startCoord, real coords[3]
             return coords[direction] + delta;
 
     }
-    
+
     return coords[direction] + delta;
 }
 
@@ -1061,7 +1060,7 @@ real GridImp::getNegativeNeighborCoord(bool periodicity, real startCoord, real c
 
         return getLastFluidNode(coords, direction, startCoord);
     }
-    
+
     return coords[direction] - delta;
 }
 
@@ -1154,15 +1153,15 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
             // one layer for receive nodes and one for stoppers
             if( lbmOrGks == LBM )
                 tmpSubDomainBox.extend(this->delta);
-            
-            if (!tmpSubDomainBox.isInside(x, y, z) 
+
+            if (!tmpSubDomainBox.isInside(x, y, z)
                 && ( this->getFieldEntry(index) == FLUID ||
                      this->getFieldEntry(index) == FLUID_CFC ||
                      this->getFieldEntry(index) == FLUID_CFF ||
                      this->getFieldEntry(index) == FLUID_FCC ||
                      this->getFieldEntry(index) == FLUID_FCF ||
                      this->getFieldEntry(index) == BC_SOLID ) )
-            {   
+            {
                 this->setFieldEntry(index, STOPPER_OUT_OF_GRID_BOUNDARY);
             }
         }
@@ -1184,13 +1183,13 @@ void GridImp::limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks
 
 void GridImp::findGridInterfaceCF(uint index, GridImp& finerGrid, LbmOrGks lbmOrGks)
 {
-	if (lbmOrGks == LBM)
-	{
-		gridInterface->findInterfaceCF            (index, this, &finerGrid);
-		gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
-	}
-	else if (lbmOrGks == GKS)
-		gridInterface->findInterfaceCF_GKS(index, this, &finerGrid);
+    if (lbmOrGks == LBM)
+    {
+        gridInterface->findInterfaceCF            (index, this, &finerGrid);
+        gridInterface->findBoundaryGridInterfaceCF(index, this, &finerGrid);
+    }
+    else if (lbmOrGks == GKS)
+        gridInterface->findInterfaceCF_GKS(index, this, &finerGrid);
 }
 
 void GridImp::findGridInterfaceFC(uint index, GridImp& finerGrid)
@@ -1217,16 +1216,16 @@ void GridImp::mesh(Object* object)
     if (triangularMesh)
         triangularMeshDiscretizationStrategy->discretize(triangularMesh, this, INVALID_SOLID, FLUID);
     else
-		//new method for geometric primitives (not cell based) to be implemented
+        //new method for geometric primitives (not cell based) to be implemented
         this->discretize(object, INVALID_SOLID, FLUID);
 
     this->closeNeedleCells();
 
-	#pragma omp parallel for
+    #pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++)
         this->findSolidStopperNode(index);
 
-	//#pragma omp parallel for
+    //#pragma omp parallel for
     for (int index = 0; index < (int)this->size; index++) {
         this->findBoundarySolidNode(index);
     }
@@ -1359,7 +1358,7 @@ void GridImp::findQs(Object* object) //TODO: enable qs for primitive objects
         findQsPrimitive(object);
 }
 
-void GridImp::allocateQs() 
+void GridImp::allocateQs()
 {
     this->qPatches = new uint[this->getNumberOfSolidBoundaryNodes()];
 
@@ -1379,8 +1378,8 @@ void GridImp::findQs(TriangularMesh &triangularMesh)
 
     if( this->qComputationStage == qComputationStageType::ComputeQs )
         allocateQs();
-    
-    
+
+
 #pragma omp parallel for
     for (int i = 0; i < triangularMesh.size; i++)
         this->findQs(triangularMesh.triangles[i]);
@@ -1406,15 +1405,15 @@ void GridImp::findQs(Triangle &triangle)
                 //if (!field.isFluid(index))
                 //    continue;
 
-				if( index == INVALID_INDEX ) continue;
+                if( index == INVALID_INDEX ) continue;
 
                 const Vertex point(x, y, z);
 
                 if( this->qComputationStage == qComputationStageType::ComputeQs ){
                     if(this->field.is(index, BC_SOLID))
                     {
-					    calculateQs(index, point, triangle);
-				    }
+                        calculateQs(index, point, triangle);
+                    }
                 }
                 else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes )
                 {
@@ -1449,14 +1448,14 @@ void GridImp::findQsPrimitive(Object * object)
         real x,y,z;
 
         this->transIndexToCoords(index,x,y,z);
-        
+
         const Vertex point(x, y, z);
 
         if( this->qComputationStage == qComputationStageType::ComputeQs ){
             if(this->field.is(index, BC_SOLID))
             {
-				calculateQs(index, point, object);
-			}
+                calculateQs(index, point, object);
+            }
         }
         else if( this->qComputationStage == qComputationStageType::FindSolidBoundaryNodes )
         {
@@ -1477,66 +1476,66 @@ void GridImp::calculateQs(const uint index, const Vertex &point, Object* object)
 {
     Vertex pointOnTriangle, direction;
 
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													    point.y + direction.y * this->delta,
-													    point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                        point.y + direction.y * this->delta,
+                                                        point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
+        error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
                     subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] )
-			{
+            {
+
+                this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
 
-				this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
-                    
                 this->qPatches[ this->qIndices[index] ] = 0;
 
-			}
-		}
-	}
+            }
+        }
+    }
 }
 
 bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex &point, Object* object) const
 {
     Vertex pointOnTriangle, direction;
 
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
+        error = object->getIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			return true;
-		}
-	}
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            return true;
+        }
+    }
     return false;
 }
 
@@ -1565,7 +1564,7 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const
 
         error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
         if (error == 0 && subdistance < 1.0 && subdistance > 0.0)
         {
@@ -1577,71 +1576,71 @@ void GridImp::calculateQs(const Vertex &point, const Triangle &triangle) const
 
 void GridImp::calculateQs(const uint index, const Vertex &point, const Triangle &triangle) const
 {
-	Vertex pointOnTriangle, direction;
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
+    Vertex pointOnTriangle, direction;
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
 #if defined(__CUDA_ARCH__)
-		direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
+        direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
 #else
-		direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]), 
+        direction = Vertex( real(distribution.dirs[i * DIMENSION + 0]),
                             real(distribution.dirs[i * DIMENSION + 1]),
-			                real(distribution.dirs[i * DIMENSION + 2]) );
+                            real(distribution.dirs[i * DIMENSION + 2]) );
 #endif
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
 
-		if (neighborIndex == INVALID_INDEX) continue;
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
+        error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            if ( -0.5        > this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] ||
                  subdistance < this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] )
-			{
-				this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
+            {
+                this->qValues[i*this->numberOfSolidBoundaryNodes + this->qIndices[index]] = subdistance;
 
                 this->qPatches[ this->qIndices[index] ] = triangle.patchIndex;
-			}
-		}
-	}
+            }
+        }
+    }
 }
 
 bool GridImp::checkIfAtLeastOneValidQ(const uint index, const Vertex & point, const Triangle & triangle) const
 {
-	Vertex pointOnTriangle, direction;
-	real subdistance;
-	int error;
-	for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
-	{
+    Vertex pointOnTriangle, direction;
+    real subdistance;
+    int error;
+    for (int i = distribution.dir_start; i <= distribution.dir_end; i++)
+    {
 #if defined(__CUDA_ARCH__)
-		direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
+        direction = Vertex(DIRECTIONS[i][0], DIRECTIONS[i][1], DIRECTIONS[i][2]);
 #else
-		direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]), 
+        direction = Vertex(real(distribution.dirs[i * DIMENSION + 0]),
                            real(distribution.dirs[i * DIMENSION + 1]),
-			               real(distribution.dirs[i * DIMENSION + 2]));
+                           real(distribution.dirs[i * DIMENSION + 2]));
 #endif
 
-		uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
-													 point.y + direction.y * this->delta,
-													 point.z + direction.z * this->delta);
-		if (neighborIndex == INVALID_INDEX) continue;
+        uint neighborIndex = this->transCoordToIndex(point.x + direction.x * this->delta,
+                                                     point.y + direction.y * this->delta,
+                                                     point.z + direction.z * this->delta);
+        if (neighborIndex == INVALID_INDEX) continue;
 
-		error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
+        error = triangle.getTriangleIntersection(point, direction, pointOnTriangle, subdistance);
 
-		subdistance /= this->delta;
+        subdistance /= this->delta;
 
-		if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
-		{
-			return true;
-		}
-	}
+        if (error == 0 && vf::Math::lessEqual(subdistance, 1.0) && vf::Math::greaterEqual(subdistance, 0.0))
+        {
+            return true;
+        }
+    }
     return false;
 }
 
@@ -1670,13 +1669,13 @@ void GridImp::findCommunicationIndices(int direction, SPtr<BoundingBox> subDomai
 
 void GridImp::findCommunicationIndex( uint index, real coordinate, real limit, int direction ){
     // negative direction get a negative sign
-    real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 );  
+    real s = ( direction % 2 == 0 ) ? ( -1.0 ) : ( 1.0 );
 
-	if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta)
-		this->communicationIndices[direction].receiveIndices.push_back(index);
+    if (std::abs(coordinate - (limit + s * 0.5 * this->delta)) < 0.1 * this->delta)
+        this->communicationIndices[direction].receiveIndices.push_back(index);
 
-	if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta) 
-		this->communicationIndices[direction].sendIndices.push_back(index);
+    if (std::abs(coordinate - (limit - s * 0.5 * this->delta)) < 0.1 * this->delta)
+        this->communicationIndices[direction].sendIndices.push_back(index);
 }
 
 bool GridImp::isSendNode(int index) const
@@ -1722,14 +1721,14 @@ uint GridImp::getReceiveIndex(int direction, uint index)
 
 void GridImp::repairCommunicationIndices(int direction)
 {
-    this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(), 
-                                                              this->communicationIndices[direction+1].sendIndices.begin(), 
+    this->communicationIndices[direction].sendIndices.insert( this->communicationIndices[direction].sendIndices.end(),
+                                                              this->communicationIndices[direction+1].sendIndices.begin(),
                                                               this->communicationIndices[direction+1].sendIndices.end() );
 
 
 
-    this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(), 
-                                                                 this->communicationIndices[direction].receiveIndices.begin(), 
+    this->communicationIndices[direction+1].receiveIndices.insert( this->communicationIndices[direction+1].receiveIndices.end(),
+                                                                 this->communicationIndices[direction].receiveIndices.begin(),
                                                                  this->communicationIndices[direction].receiveIndices.end() );
 
     this->communicationIndices[direction].receiveIndices = this->communicationIndices[direction+1].receiveIndices;
@@ -1834,19 +1833,19 @@ real GridImp::getMaximumOnNodes(const real &maxExact, const real &decimalStart,
     return maxNode;
 }
 
-uint GridImp::getXIndex(real x) const 
-{ 
-    return std::lround((x - startX) / delta); 
+uint GridImp::getXIndex(real x) const
+{
+    return std::lround((x - startX) / delta);
 }
 
 uint GridImp::getYIndex(real y) const
-{ 
-    return std::lround((y - startY) / delta); 
+{
+    return std::lround((y - startY) / delta);
 }
 
 uint GridImp::getZIndex(real z) const
-{ 
-    return std::lround((z - startZ) / delta); 
+{
+    return std::lround((z - startZ) / delta);
 }
 
 real GridImp::getDelta() const
@@ -1861,11 +1860,11 @@ uint GridImp::getSize() const
 
 uint GridImp::getSparseSize() const
 {
-    return this->sparseSize; 
+    return this->sparseSize;
 }
 
-uint GridImp::getNumberOfFluidNodes() const { 
-    return (uint)this->fluidNodeIndices.size(); 
+uint GridImp::getNumberOfFluidNodes() const {
+    return (uint)this->fluidNodeIndices.size();
 }
 
 Field GridImp::getField() const
@@ -2058,18 +2057,18 @@ void GridImp::getNodeValues(real *xCoords, real *yCoords, real *zCoords, uint *n
     }
 }
 
-void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const 
-{ 
+void GridImp::getFluidNodeIndices(uint *fluidNodeIndices) const
+{
     for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndices.size(); nodeNumber++)
         fluidNodeIndices[nodeNumber] = this->fluidNodeIndices[nodeNumber];
 }
 
-uint GridImp::getNumberOfFluidNodesBorder() const 
-{ 
-    return (uint)this->fluidNodeIndicesBorder.size(); 
+uint GridImp::getNumberOfFluidNodesBorder() const
+{
+    return (uint)this->fluidNodeIndicesBorder.size();
 }
 
-void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const 
+void GridImp::getFluidNodeIndicesBorder(uint *fluidNodeIndicesBorder) const
 {
     for (uint nodeNumber = 0; nodeNumber < (uint)this->fluidNodeIndicesBorder.size(); nodeNumber++)
         fluidNodeIndicesBorder[nodeNumber] = this->fluidNodeIndicesBorder[nodeNumber];
@@ -2083,14 +2082,14 @@ void GridImp::addFluidNodeIndicesMacroVars(std::vector<uint> _fluidNodeIndicesMa
 }
 
 void GridImp::addFluidNodeIndicesApplyBodyForce(std::vector<uint> _fluidNodeIndicesApplyBodyForce)
-{    
-    
+{
+
     size_t newSize = this->fluidNodeIndicesApplyBodyForce.size()+_fluidNodeIndicesApplyBodyForce.size();
     this->fluidNodeIndicesApplyBodyForce.reserve(newSize);
     std::copy(_fluidNodeIndicesApplyBodyForce.begin(), _fluidNodeIndicesApplyBodyForce.end(), std::back_inserter(this->fluidNodeIndicesApplyBodyForce));
 }
 
-void GridImp::addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures) 
+void GridImp::addFluidNodeIndicesAllFeatures(std::vector<uint> _fluidNodeIndicesAllFeatures)
 {
 
     size_t newSize = this->fluidNodeIndicesAllFeatures.size()+_fluidNodeIndicesAllFeatures.size();
@@ -2109,14 +2108,14 @@ void GridImp::sortFluidNodeIndicesMacroVars()
          // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
-            this->fluidNodeIndicesMacroVars.erase(   std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(), 
+            this->fluidNodeIndicesMacroVars.erase(   std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
                                             this->fluidNodeIndicesMacroVars.end()
                                         );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
-        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), 
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ),
                                         this->fluidNodeIndices.end()
                                     );
@@ -2134,14 +2133,14 @@ void GridImp::sortFluidNodeIndicesApplyBodyForce()
          // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
-            this->fluidNodeIndicesApplyBodyForce.erase(   std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(), 
+            this->fluidNodeIndicesApplyBodyForce.erase(   std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
                                             this->fluidNodeIndicesApplyBodyForce.end()
                                         );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
-        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), 
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
                                         this->fluidNodeIndices.end()
                                     );
@@ -2156,34 +2155,34 @@ void GridImp::sortFluidNodeIndicesAllFeatures()
         // Remove duplicates
         this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() );
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
-        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(), 
+        this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
                                         this->fluidNodeIndices.end()
                                     );
     }
 }
 
-uint GridImp::getNumberOfFluidNodeIndicesMacroVars() const { 
-    return (uint)this->fluidNodeIndicesMacroVars.size(); 
+uint GridImp::getNumberOfFluidNodeIndicesMacroVars() const {
+    return (uint)this->fluidNodeIndicesMacroVars.size();
 }
 
-uint GridImp::getNumberOfFluidNodeIndicesApplyBodyForce() const { 
-    return (uint)this->fluidNodeIndicesApplyBodyForce.size(); 
+uint GridImp::getNumberOfFluidNodeIndicesApplyBodyForce() const {
+    return (uint)this->fluidNodeIndicesApplyBodyForce.size();
 }
 
-uint GridImp::getNumberOfFluidNodeIndicesAllFeatures() const { 
-    return (uint)this->fluidNodeIndicesAllFeatures.size(); 
+uint GridImp::getNumberOfFluidNodeIndicesAllFeatures() const {
+    return (uint)this->fluidNodeIndicesAllFeatures.size();
 }
 
-void GridImp::getFluidNodeIndicesMacroVars(uint *_fluidNodeIndicesMacroVars) const 
+void GridImp::getFluidNodeIndicesMacroVars(uint *_fluidNodeIndicesMacroVars) const
 {
-    std::copy(fluidNodeIndicesMacroVars.begin(), fluidNodeIndicesMacroVars.end(), _fluidNodeIndicesMacroVars);       
+    std::copy(fluidNodeIndicesMacroVars.begin(), fluidNodeIndicesMacroVars.end(), _fluidNodeIndicesMacroVars);
 }
-void GridImp::getFluidNodeIndicesApplyBodyForce(uint *_fluidNodeIndicesApplyBodyForce) const 
+void GridImp::getFluidNodeIndicesApplyBodyForce(uint *_fluidNodeIndicesApplyBodyForce) const
 {
     std::copy(fluidNodeIndicesApplyBodyForce.begin(), fluidNodeIndicesApplyBodyForce.end(), _fluidNodeIndicesApplyBodyForce);
 }
-void GridImp::getFluidNodeIndicesAllFeatures(uint *_fluidNodeIndicesAllFeatures) const 
+void GridImp::getFluidNodeIndicesAllFeatures(uint *_fluidNodeIndicesAllFeatures) const
 {
     std::copy(fluidNodeIndicesAllFeatures.begin(), fluidNodeIndicesAllFeatures.end(), _fluidNodeIndicesAllFeatures);
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index 42ba7a464a6e76fd747cc6d11a01e3957c865765..4c50d458c60f04db14c247e16ef3dc44833cb8f7 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -30,7 +30,7 @@ public:
     virtual void allocArrays_OffsetScale() = 0;
     virtual void allocArrays_taggedFluidNodes() = 0;
 
-    virtual void tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices, CollisionTemplate tag, uint level) = 0;
+    virtual void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) = 0;
     virtual void sortFluidNodeTags() = 0;
 
 	virtual void setDimensions() = 0;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index 793400869dd29fe12be357ffcf87c0238ea70eb9..66af0d1c4603353148c6201de15d13d6243b0612 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -74,11 +74,11 @@ void GridReader::allocArrays_CoordNeighborGeo()
 	uint numberOfNodesGlobal = 0;
 	std::cout << "Number of Nodes: " << std::endl;
 
-	for (uint level = 0; level <= maxLevel; level++) 
-	{		
-		int numberOfNodesPerLevel = coordX.getSize(level) + 1;
-		numberOfNodesGlobal += numberOfNodesPerLevel;
-		std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
+    for (uint level = 0; level <= maxLevel; level++)
+    {
+        int numberOfNodesPerLevel = coordX.getSize(level) + 1;
+        numberOfNodesGlobal += numberOfNodesPerLevel;
+        std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
 
 		setNumberOfNodes(numberOfNodesPerLevel, level);
 
@@ -130,9 +130,9 @@ void GridReader::allocArrays_BoundaryValues()
 
     for (uint i = 0; i < channelBoundaryConditions.size(); i++)
     {
-        if (     this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); } 
-		else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); } 
-		else if (this->channelBoundaryConditions[i] == "outflow")  { setOutflowValues(i);  }
+        if (     this->channelBoundaryConditions[i] == "velocity") { fillVelocityVectors(i); }
+        else if (this->channelBoundaryConditions[i] == "pressure") { setPressureValues(i); }
+        else if (this->channelBoundaryConditions[i] == "outflow")  { setOutflowValues(i);  }
     }
 
 	setVelocityValues();
@@ -223,13 +223,13 @@ void GridReader::allocArrays_taggedFluidNodes() {
 	// TODO
 }
 
-void GridReader::tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices, CollisionTemplate tag, uint level){
-	std::cout << "GridReader::tagFluidNodeIndices not implemented" << std::endl;
+void GridReader::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level){
+    std::cout << "GridReader::tagFluidNodeIndices not implemented" << std::endl;
     // TODO
 }
 
 void GridReader::sortFluidNodeTags(){
-	std::cout << "GridReader::sortFluidNodeTags not implemented" << std::endl;
+    std::cout << "GridReader::sortFluidNodeTags not implemented" << std::endl;
     // TODO
 }
 
@@ -285,23 +285,23 @@ void GridReader::fillVelocityVectors(int channelSide)
 			delete[] veloX_ValuesPerSide;
             delete[] veloY_ValuesPerSide;
             delete[] veloZ_ValuesPerSide;
-        }        
-	}
+        }
+    }
 
 
 }
 
-void GridReader::setVelocityValues() { 
+void GridReader::setVelocityValues() {
     for (int level = 0; level < (int)(velocityX_BCvalues.size()); level++) {
-        
-		int sizePerLevel = (int) velocityX_BCvalues[level].size();
+
+        int sizePerLevel = (int) velocityX_BCvalues[level].size();
         std::cout << "complete size velocity level " << level << " : " << sizePerLevel << std::endl;
         setVelocitySizePerLevel(level, sizePerLevel);
-        
-		if (sizePerLevel > 1) {
+
+        if (sizePerLevel > 1) {
             cudaMemoryManager->cudaAllocVeloBC(level);
             setVelocity(level, sizePerLevel);
-			cudaMemoryManager->cudaCopyVeloBC(level);
+            cudaMemoryManager->cudaCopyVeloBC(level);
         }
     }
 }
@@ -672,8 +672,8 @@ void GridReader::modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned
 /*------------------------------------------------------------------------------------------------*/
 /*---------------------------------------private q methods----------------------------------------*/
 /*------------------------------------------------------------------------------------------------*/
-void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index, 
-										std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
+void GridReader::initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
+                                        std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
 {
     boundaryQ->setValuesInVector(Qs, level);
     boundaryQ->setIndexInVector(index, level);
@@ -689,7 +689,7 @@ void GridReader::copyVectorsToQStruct(std::vector<std::vector<real>> &Qs,
 
 	for (int direction = 0; direction < para->getD3Qxx(); direction++) {
         for (size_t indexQ = 0; indexQ < sizeOfValues; indexQ++) {
-            qTemp.q27[direction][indexQ] = Qs[direction][indexQ]; 
+            qTemp.q27[direction][indexQ] = Qs[direction][indexQ];
         }
     }
 
@@ -851,46 +851,46 @@ void GridReader::setBoundingBox()
 
 void GridReader::initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex,  std::string boundaryCondition)
 {
-	std::vector<unsigned int>neighVec;
-	std::vector<unsigned int>indexVec;
-	
-	int counter = 0;
-
-	for(unsigned int i=0; i<neighX->getLevel();i++) {
-		if(boundaryCondition =="periodic_y"){
-			neighVec = neighY->getVec(i);
-		} 
-		else if(boundaryCondition =="periodic_x"){
-			neighVec = neighX->getVec(i);
-		}
-		else if(boundaryCondition =="periodic_z"){
-			neighVec = neighZ->getVec(i);
-		}
-		else {
-			std::cout << "wrong String in periodicValue" << std::endl;
-			exit(1);
-		}
+    std::vector<unsigned int>neighVec;
+    std::vector<unsigned int>indexVec;
 
-		for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) {
-			if(periodV[i][0][counter] != 0) {
-				neighVec[*it]=periodV[i][0][counter];
-			}
+    int counter = 0;
 
-			counter++;
-		}
+    for(unsigned int i=0; i<neighX->getLevel();i++) {
+        if(boundaryCondition =="periodic_y"){
+            neighVec = neighY->getVec(i);
+        }
+        else if(boundaryCondition =="periodic_x"){
+            neighVec = neighX->getVec(i);
+        }
+        else if(boundaryCondition =="periodic_z"){
+            neighVec = neighZ->getVec(i);
+        }
+        else {
+            std::cout << "wrong String in periodicValue" << std::endl;
+            exit(1);
+        }
 
+        for (std::vector<unsigned int>::iterator it = periodIndex[i].begin(); it != periodIndex[i].end(); it++) {
+            if(periodV[i][0][counter] != 0) {
+                neighVec[*it]=periodV[i][0][counter];
+            }
 
-		if(boundaryCondition =="periodic_y"){
-			neighY->setVec(i, neighVec);
-		} 
-		else if(boundaryCondition =="periodic_x"){
-			neighX->setVec(i, neighVec);
-		}
-		else if(boundaryCondition =="periodic_z"){
-			neighZ->setVec(i, neighVec);
-		}
+            counter++;
+        }
 
-	}
+
+        if(boundaryCondition =="periodic_y"){
+            neighY->setVec(i, neighVec);
+        }
+        else if(boundaryCondition =="periodic_x"){
+            neighX->setVec(i, neighVec);
+        }
+        else if(boundaryCondition =="periodic_z"){
+            neighZ->setVec(i, neighVec);
+        }
+
+    }
 }
 
 void GridReader::makeReader(std::shared_ptr<Parameter> para)
@@ -921,9 +921,9 @@ void GridReader::makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, st
 
 void GridReader::setChannelBoundaryCondition()
 {
-	for (std::size_t i = 0; i < channelDirections.size(); i++)
-	{
-		this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition();
-		std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl;
-	}
-}
\ No newline at end of file
+    for (std::size_t i = 0; i < channelDirections.size(); i++)
+    {
+        this->channelBoundaryConditions[i] = BC_Values[i]->getBoundaryCondition();
+        std::cout << this->channelDirections[i] << " Boundary: " << channelBoundaryConditions[i] << std::endl;
+    }
+}
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
index 2c17c28022d88a65e922e23d1c89f5166f5a1716..041d2c3ce94592f792c5a850eebd14c07f4db1b4 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h
@@ -3,9 +3,9 @@
 
 #include "../GridProvider.h"
 
-#include <vector>
-#include <string>
 #include <memory>
+#include <string>
+#include <vector>
 
 #include "LBM/LB.h"
 
@@ -16,15 +16,14 @@ class BoundaryValues;
 class BoundaryQs;
 class CoordNeighborGeoV;
 
-class VIRTUALFLUIDS_GPU_EXPORT GridReader
-	: public GridProvider
+class VIRTUALFLUIDS_GPU_EXPORT GridReader : public GridProvider
 {
 private:
-	bool binaer;
-	std::vector<std::string> channelDirections;
-	std::vector<std::string> channelBoundaryConditions;
-	std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB;
-	std::vector<std::shared_ptr<BoundaryValues> > BC_Values;
+    bool binaer;
+    std::vector<std::string> channelDirections;
+    std::vector<std::string> channelBoundaryConditions;
+    std::shared_ptr<CoordNeighborGeoV> neighX, neighY, neighZ, neighWSB;
+    std::vector<std::shared_ptr<BoundaryValues>> BC_Values;
 
     std::vector<std::vector<real>> velocityX_BCvalues, velocityY_BCvalues, velocityZ_BCvalues;
     std::vector<std::vector<std::vector<real>>> velocityQs;
@@ -34,59 +33,62 @@ private:
     std::vector<std::vector<real>> outflowBCvalues;
 
 public:
-	GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
-    ~GridReader();
-	void allocArrays_CoordNeighborGeo() override;
-	void allocArrays_BoundaryValues() override;
+    GridReader(FILEFORMAT format, std::shared_ptr<Parameter> para,
+               std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
+    ~GridReader() override;
+    void allocArrays_CoordNeighborGeo() override;
+    void allocArrays_BoundaryValues() override;
     void allocArrays_OffsetScale() override;
     void allocArrays_taggedFluidNodes() override;
 
-	void tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
-	void sortFluidNodeTags() override;
+    void tagFluidNodeIndices(const std::vector<uint> &taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
 
-	void initalValuesDomainDecompostion(int level);
+    void sortFluidNodeTags() override;
 
-	void setChannelBoundaryCondition();
+    void initalValuesDomainDecompostion(int level);
 
-	void allocArrays_BoundaryQs() override;
-	bool getBinaer();
-	void setDimensions() override;
-	void setBoundingBox() override;
-	void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) override;
+    void setChannelBoundaryCondition();
+
+    void allocArrays_BoundaryQs() override;
+    bool getBinaer();
+    void setDimensions() override;
+    void setBoundingBox() override;
+    void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int>>> periodV,
+                           std::vector<std::vector<unsigned int>> periodIndex, std::string way) override;
 
 private:
-	void makeReader(std::shared_ptr<Parameter> para);
-	void makeReader(std::vector<std::shared_ptr<BoundaryQs> > &BC_Qs, std::shared_ptr<Parameter> para);
+    void makeReader(std::shared_ptr<Parameter> para);
+    void makeReader(std::vector<std::shared_ptr<BoundaryQs>> &BC_Qs, std::shared_ptr<Parameter> para);
 
-	void setPressureValues(int channelSide) const;
-	void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
+    void setPressureValues(int channelSide) const;
+    void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
 
-	void fillVelocityVectors(int channelSide);
+    void fillVelocityVectors(int channelSide);
     void setVelocityValues();
-	void setVelocity(int level, int sizePerLevel) const;
-
-	void setOutflowValues(int channelSide) const;
-	void setOutflow(int level, int sizePerLevel, int channelSide) const;
+    void setVelocity(int level, int sizePerLevel) const;
 
+    void setOutflowValues(int channelSide) const;
+    void setOutflow(int level, int sizePerLevel, int channelSide) const;
 
-	//void fillVelocityQVectors(int channelSide);
+    // void fillVelocityQVectors(int channelSide);
     void setPressQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ);
-	void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
-	void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setVelocityQs(std::shared_ptr<BoundaryQs> boundaryQ);
+    void setOutflowQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void setGeoQs(std::shared_ptr<BoundaryQs> boundaryQ) const;
+    void modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
 
-	void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
+    void initalVectorForQStruct(std::vector<std::vector<std::vector<real>>> &Qs, std::vector<std::vector<int>> &index,
                                 std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
     void copyVectorsToQStruct(std::vector<std::vector<real>> &Qs, std::vector<int> &index,
                               QforBoundaryConditions &Q) const;
     void initalQStruct(QforBoundaryConditions &Q, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
-	void setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const;
-	bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void printQSize(std::string bc, std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+    void setQ27Size(QforBoundaryConditions &Q, real *QQ, unsigned int sizeQ) const;
+    bool hasQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const;
+
 public:
     void initalGridInformations() override;
 };
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index d89f4cd85985694b4529d338ab5e4c5a86cd3a38..d14973be77ab6766dc04ec95213efe663db3b873 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -20,13 +20,15 @@
 
 using namespace vf::lbm::dir;
 
-GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator):
-    mpiProcessID(communicator.getPID()), builder(builder)
+GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para,
+                             std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator &communicator)
+    : mpiProcessID(communicator.getPID()), builder(builder)
 {
     this->para = para;
     this->cudaMemoryManager = cudaMemoryManager;
     this->indexRearrangement = std::make_unique<IndexRearrangementForStreams>(para, builder, communicator);
-    this->interpolationGrouper = std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
+    this->interpolationGrouper =
+        std::make_unique<InterpolationCellGrouper>(para->getParHallLevels(), para->getParDallLevels(), builder);
 }
 
 GridGenerator::~GridGenerator() = default;
@@ -59,15 +61,15 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
     std::cout << "Number of Level: " << numberOfLevels << std::endl;
     int numberOfNodesGlobal = 0;
     std::cout << "Number of Nodes: " << std::endl;
-    
-    for (uint level = 0; level < numberOfLevels; level++) 
+
+    for (uint level = 0; level < numberOfLevels; level++)
     {
         const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
         numberOfNodesGlobal += numberOfNodesPerLevel;
         std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
-    
+
         setNumberOfNodes(numberOfNodesPerLevel, level);
-    
+
         cudaMemoryManager->cudaAllocCoord(level);
         cudaMemoryManager->cudaAllocSP(level);
         //cudaMemoryManager->cudaAllocF3SP(level);
@@ -75,7 +77,7 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
 
         if(para->getUseTurbulentViscosity())
             cudaMemoryManager->cudaAllocTurbulentViscosity(level);
-        
+
         if(para->getIsBodyForce())
             cudaMemoryManager->cudaAllocBodyForce(level);
 
@@ -106,7 +108,7 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
 
 void GridGenerator::allocArrays_taggedFluidNodes() {
 
-    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) 
+    for (uint level = 0; level < builder->getNumberOfGridLevels(); level++)
     {
         for ( CollisionTemplate tag: all_CollisionTemplate )
         {   //TODO: Need to add CollisionTemplate to GridBuilder to allow as argument and get rid of indivual get funtions for fluid node indices... and clean up this mess
@@ -155,16 +157,16 @@ void GridGenerator::allocArrays_taggedFluidNodes() {
             }
         }
         VF_LOG_INFO("Number of tagged nodes on level {}:", level);
-        VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}", 
+        VF_LOG_INFO("Default: {}, Border: {}, WriteMacroVars: {}, ApplyBodyForce: {}, AllFeatures: {}",
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::SubDomainBorder],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::WriteMacroVars],
                     para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::ApplyBodyForce],
-                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures]    );        
+                    para->getParH(level)->numberOfTaggedFluidNodes[CollisionTemplate::AllFeatures]    );
     }
 }
 
-void GridGenerator::tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices, CollisionTemplate tag, uint level) {
+void GridGenerator::tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) {
     switch(tag)
     {
         case CollisionTemplate::WriteMacroVars:
@@ -184,7 +186,7 @@ void GridGenerator::tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices
             break;
 
     }
-    
+
 }
 
 void GridGenerator::sortFluidNodeTags() {
@@ -201,7 +203,7 @@ void GridGenerator::sortFluidNodeTags() {
 void GridGenerator::allocArrays_BoundaryValues()
 {
     std::cout << "------read BoundaryValues------" << std::endl;
-    int blocks = 0;
+    int blocks;
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPressureValues = int(builder->getPressureSize(level));
@@ -229,12 +231,12 @@ void GridGenerator::allocArrays_BoundaryValues()
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         para->getParH(level)->slipBC.numberOfBCnodes = 0;
-        if (numberOfSlipValues > 1)
-        {
+        if (numberOfSlipValues > 1) {
             blocks = (numberOfSlipValues / para->getParH(level)->numberofthreads) + 1;
             para->getParH(level)->slipBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocSlipBC(level);
-            builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY, para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
+            builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY,
+                                   para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
             cudaMemoryManager->cudaCopySlipBC(level);
         }
         para->getParD(level)->slipBC.numberOfBCnodes = para->getParH(level)->slipBC.numberOfBCnodes;
@@ -254,11 +256,11 @@ void GridGenerator::allocArrays_BoundaryValues()
             para->getParH(level)->stressBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocStressBC(level);
             cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor());
-            builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ, 
+            builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ,
                                         para->getParH(level)->stressBC.Vx,       para->getParH(level)->stressBC.Vy,       para->getParH(level)->stressBC.Vz,
                                         para->getParH(level)->stressBC.Vx1,      para->getParH(level)->stressBC.Vy1,      para->getParH(level)->stressBC.Vz1,
-                                        para->getParH(level)->stressBC.k,        para->getParH(level)->stressBC.kN,       
-                                        para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0, 
+                                        para->getParH(level)->stressBC.k,        para->getParH(level)->stressBC.kN,
+                                        para->getParH(level)->wallModel.samplingOffset, para->getParH(level)->wallModel.z0,
                                         level);
 
             cudaMemoryManager->cudaCopyStressBC(level);
@@ -268,7 +270,7 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->getParH(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
         para->getParD(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
     }
-    
+
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfVelocityValues = int(builder->getVelocitySize(level));
@@ -285,7 +287,8 @@ void GridGenerator::allocArrays_BoundaryValues()
             cudaMemoryManager->cudaAllocVeloBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-            builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy, para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level);
+            builder->getVelocityValues(para->getParH(level)->velocityBC.Vx, para->getParH(level)->velocityBC.Vy,
+                                       para->getParH(level)->velocityBC.Vz, para->getParH(level)->velocityBC.k, level);
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -327,7 +330,7 @@ void GridGenerator::allocArrays_BoundaryValues()
         const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level));
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "size precursor level " << level << " : " << numberOfPrecursorValues << "\n";
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        int blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
+        blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
         para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads;
         para->getParD(level)->precursorBC.sizeQ = para->getParH(level)->precursorBC.sizeQ;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -335,19 +338,19 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->getParD(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues;
         para->getParH(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
         para->getParD(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
-        
+
         if (numberOfPrecursorValues > 1)
         {
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaAllocPrecursorBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             builder->getPrecursorValues(
-                    para->getParH(level)->precursorBC.planeNeighbor0PP, para->getParH(level)->precursorBC.planeNeighbor0PM, 
-                    para->getParH(level)->precursorBC.planeNeighbor0MP, para->getParH(level)->precursorBC.planeNeighbor0MM, 
-                    para->getParH(level)->precursorBC.weights0PP, para->getParH(level)->precursorBC.weights0PM, 
-                    para->getParH(level)->precursorBC.weights0MP, para->getParH(level)->precursorBC.weights0MM, 
-                    para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 
-                    para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads, 
+                    para->getParH(level)->precursorBC.planeNeighbor0PP, para->getParH(level)->precursorBC.planeNeighbor0PM,
+                    para->getParH(level)->precursorBC.planeNeighbor0MP, para->getParH(level)->precursorBC.planeNeighbor0MM,
+                    para->getParH(level)->precursorBC.weights0PP, para->getParH(level)->precursorBC.weights0PM,
+                    para->getParH(level)->precursorBC.weights0MP, para->getParH(level)->precursorBC.weights0MM,
+                    para->getParH(level)->precursorBC.k, para->getParH(level)->transientBCInputFileReader, para->getParH(level)->precursorBC.numberOfPrecursorNodes,
+                    para->getParH(level)->precursorBC.numberOfQuantities, para->getParH(level)->precursorBC.timeStepsBetweenReads,
                     para->getParH(level)->precursorBC.velocityX, para->getParH(level)->precursorBC.velocityY, para->getParH(level)->precursorBC.velocityZ,
                     level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -360,7 +363,9 @@ void GridGenerator::allocArrays_BoundaryValues()
 
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
             {
-                if(reader->getNumberOfQuantities() != para->getParD(level)->precursorBC.numberOfQuantities) throw std::runtime_error("Number of quantities in reader and number of quantities needed for precursor don't match!");
+                if(reader->getNumberOfQuantities() != para->getParD(level)->precursorBC.numberOfQuantities)
+                    throw std::runtime_error(
+                        "Number of quantities in reader and number of quantities needed for precursor don't match!");
             }
 
             cudaMemoryManager->cudaCopyPrecursorBC(level);
@@ -368,7 +373,7 @@ void GridGenerator::allocArrays_BoundaryValues()
 
             // read first timestep of precursor into next and copy to next on device
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
-            {   
+            {
                 reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 0);
             }
 
@@ -382,7 +387,7 @@ void GridGenerator::allocArrays_BoundaryValues()
             //read second timestep of precursor into next and copy next to device
             real nextTime = para->getParD(level)->precursorBC.timeStepsBetweenReads*pow(2,-((real)level))*para->getTimeRatio();
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
-            {   
+            {
                 reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, nextTime);
             }
 
@@ -398,7 +403,7 @@ void GridGenerator::allocArrays_BoundaryValues()
 
             //start usual cycle of loading, i.e. read velocities of timestep after current and copy asynchronously to device
             for(auto reader : para->getParH(level)->transientBCInputFileReader)
-            {   
+            {
                 reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, 2*nextTime);
             }
 
@@ -476,7 +481,7 @@ void GridGenerator::initalValuesDomainDecompostion()
     if (para->getNumprocs() < 2)
         return;
     if ((para->getNumprocs() > 1) /*&& (procNeighborsSendX.size() == procNeighborsRecvX.size())*/) {
-        
+
         // direction has to be changed in case of periodic BCs and multiple sub domains
         std::vector<int> fillOrder = { 0, 1, 2, 3, 4, 5 };
 
@@ -556,7 +561,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborX[indexProcessNeighbor].index, direction,
                                                    level);
                         if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC)
-                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction);             
+                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, indexProcessNeighbor, direction);
                         ////////////////////////////////////////////////////////////////////////////////////////
                         cudaMemoryManager->cudaCopyProcessNeighborXIndex(level, indexProcessNeighbor);
                         ////////////////////////////////////////////////////////////////////////////////////////
@@ -619,7 +624,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         ////////////////////////////////////////////////////////////////////////////////////////
                         // malloc on host and device
                         cudaMemoryManager->cudaAllocProcessNeighborY(level, indexProcessNeighbor);
-                        ////////////////////////////////////////////////////////////////////////////////////////                        
+                        ////////////////////////////////////////////////////////////////////////////////////////
                         // init index arrays
                         builder->getSendIndices(para->getParH(level)->sendProcessNeighborY[indexProcessNeighbor].index, direction, level);
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborY[indexProcessNeighbor].index, direction,
@@ -638,7 +643,7 @@ void GridGenerator::initalValuesDomainDecompostion()
 
                     if (tempSend > 0) {
                         int indexProcessNeighbor = (int)para->getParH(level)->sendProcessNeighborZ.size();
-    
+
                         para->getParH(level)->sendProcessNeighborZ.emplace_back();
                         para->getParD(level)->sendProcessNeighborZ.emplace_back();
                         para->getParH(level)->recvProcessNeighborZ.emplace_back();
@@ -930,7 +935,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             unsigned int sizeQ = para->getParH(i)->pressureBC.numberOfBCnodes;
             QforBoundaryConditions Q;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getPressureQs(Q.q27, i);
 
 
@@ -977,7 +982,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             unsigned int sizeQ = para->getParH(i)->slipBC.numberOfBCnodes;
             QforBoundaryConditions Q;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getSlipQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopySlipBC(i);
@@ -997,7 +1002,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes;
             QforBoundaryConditions Q;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-            
+
             builder->getStressQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopyStressBC(i);
@@ -1165,7 +1170,7 @@ void GridGenerator::allocArrays_BoundaryQs()
 
 void GridGenerator::allocArrays_OffsetScale()
 {
-    for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++) 
+    for (uint level = 0; level < builder->getNumberOfGridLevels() - 1; level++)
     {
         const uint numberOfNodesPerLevelCF = builder->getNumberOfNodesCF(level);
         const uint numberOfNodesPerLevelFC = builder->getNumberOfNodesFC(level);
@@ -1204,7 +1209,7 @@ void GridGenerator::allocArrays_OffsetScale()
         builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level);
         builder->getOffsetFC(para->getParH(level)->offFC.xOffFC, para->getParH(level)->offFC.yOffFC, para->getParH(level)->offFC.zOffFC, level);
         builder->getGridInterfaceIndices(para->getParH(level)->intCF.ICellCFC, para->getParH(level)->intCF.ICellCFF, para->getParH(level)->intFC.ICellFCC, para->getParH(level)->intFC.ICellFCF, level);
-        
+
         if (para->getUseStreams() || para->getNumprocs() > 1) {
             // split fine-to-coarse indices into border and bulk
             interpolationGrouper->splitFineToCoarseIntoBorderAndBulk(level);
@@ -1307,7 +1312,7 @@ std::string GridGenerator::verifyNeighborIndex(int level, int index , int &inval
 
     //std::cout << para->getParH(level)->coordinateX[1] << ", " << para->getParH(level)->coordinateY[1] << ", " << para->getParH(level)->coordinateZ[1] << std::endl;
     //std::cout << para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] << ", " << para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] << std::endl;
-    
+
     real maxX = para->getParH(level)->coordinateX[para->getParH(level)->numberOfNodes - 1] - delta;
     real maxY = para->getParH(level)->coordinateY[para->getParH(level)->numberOfNodes - 1] - delta;
     real maxZ = para->getParH(level)->coordinateZ[para->getParH(level)->numberOfNodes - 1] - delta;
@@ -1348,8 +1353,8 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int
 
     if (!neighborValid) {
         oss << "NeighborX invalid from: (" << x << ", " << y << ", " << z << "), index: " << index << ", "
-            << direction << " neighborIndex: " << neighborIndex << 
-            ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ << 
+            << direction << " neighborIndex: " << neighborIndex <<
+            ", actual neighborCoords : (" << neighborCoordX << ", " << neighborCoordY << ", " << neighborCoordZ <<
             "), expected neighborCoords : (" << neighborX << ", " << neighborY << ", " << neighborZ << ")\n";
         numberOfWrongNeihgbors++;
     }
@@ -1384,4 +1389,4 @@ void GridGenerator::getPointersToBoundaryConditions(QforBoundaryConditions& boun
     boundaryConditionStruct.q27[DIR_MMM] =  &subgridDistances[DIR_MMM * numberOfBCnodes];
     boundaryConditionStruct.q27[DIR_PMM] =  &subgridDistances[DIR_PMM * numberOfBCnodes];
     boundaryConditionStruct.q27[DIR_MPM] =  &subgridDistances[DIR_MPM * numberOfBCnodes];
-}
\ No newline at end of file
+}
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
index cee661a0c1092e7a3d334018133fd2f963ada991..c97ed02a64da1d5fafa18150c75d149f96484d44 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
@@ -78,7 +78,7 @@ public:
     void allocArrays_OffsetScale() override;
     void allocArrays_taggedFluidNodes() override;
 
-    void tagFluidNodeIndices(std::vector<uint> taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
+    void tagFluidNodeIndices(const std::vector<uint>& taggedFluidNodeIndices, CollisionTemplate tag, uint level) override;
     void sortFluidNodeTags() override;
 
     virtual void setDimensions() override;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index a7b22c244da368b9591c5f528bb7bd5eaea91e50..567a4112e935e0a154288ec7beb1fc82d01c9d7a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -53,130 +53,130 @@ __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														int level,
-														real* forces,
-														bool EvenOrOdd);
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        int size_Mat,
+                                                        int level,
+                                                        real* forces,
+                                                        bool EvenOrOdd);
 
 
 __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
-																unsigned int* bcMatD,
-																unsigned int* neighborX,
-																unsigned int* neighborY,
-																unsigned int* neighborZ,
-																real* DDStart,
-																int size_Mat,
-																int level,
-																real* forces,
-																bool EvenOrOdd);
+                                                                unsigned int* bcMatD,
+                                                                unsigned int* neighborX,
+                                                                unsigned int* neighborY,
+                                                                unsigned int* neighborZ,
+                                                                real* DDStart,
+                                                                int size_Mat,
+                                                                int level,
+                                                                real* forces,
+                                                                bool EvenOrOdd);
 
 
 
 __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
-													real deltaPhi,
-													real angularVelocity,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* coordX,
-													real* coordY,
-													real* coordZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    real deltaPhi,
+                                                    real angularVelocity,
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* coordX,
+                                                    real* coordY,
+                                                    real* coordZ,
+                                                    real* DDStart,
+                                                    int size_Mat,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Cascade_SP_27( real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    int size_Mat,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_New_SP_27( real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    int size_Mat,
+                                                    bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														real* dxxUx,
-														real* dyyUy,
-														real* dzzUz,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        real* dxxUx,
+                                                        real* dyyUy,
+                                                        real* dzzUz,
+                                                        int size_Mat,
+                                                        bool EvenOrOdd);
 
 __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    int size_Mat,
+                                                    bool EvenOrOdd);
 
 __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void Cumulant_One_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 inline __device__ void forwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K);
 
@@ -189,57 +189,57 @@ inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, rea
 
 
 __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
-	real omega_in,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int* neighborWSB,
-	real* veloX,
-	real* veloY,
-	real* veloZ,
-	real* DDStart,
-	real* turbulentViscosity,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega_in,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    real* veloX,
+    real* veloY,
+    real* veloZ,
+    real* DDStart,
+    real* turbulentViscosity,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 
 __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
-															unsigned int* neighborX,
-															unsigned int* neighborY,
-															unsigned int* neighborZ,
-															real* DDStart,
-															int size_Mat,
-															int level,
-															real* forces,
-															real porosity,
-															real darcy,
-															real forchheimer,
-															unsigned int sizeOfPorousMedia,
-															unsigned int* nodeIdsPorousMedia,
-															bool EvenOrOdd);
+                                                            unsigned int* neighborX,
+                                                            unsigned int* neighborY,
+                                                            unsigned int* neighborZ,
+                                                            real* DDStart,
+                                                            int size_Mat,
+                                                            int level,
+                                                            real* forces,
+                                                            real porosity,
+                                                            real darcy,
+                                                            real forchheimer,
+                                                            unsigned int sizeOfPorousMedia,
+                                                            unsigned int* nodeIdsPorousMedia,
+                                                            bool EvenOrOdd);
 
 __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
-												  unsigned int* bcMatD,
-												  unsigned int* neighborX,
-												  unsigned int* neighborY,
-												  unsigned int* neighborZ,
-												  real* DDStart,
-												  real* DD7,
-												  int size_Mat,
-												  bool EvenOrOdd);
+                                                  unsigned int* bcMatD,
+                                                  unsigned int* neighborX,
+                                                  unsigned int* neighborY,
+                                                  unsigned int* neighborZ,
+                                                  real* DDStart,
+                                                  real* DD7,
+                                                  int size_Mat,
+                                                  bool EvenOrOdd);
 
 __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
-												   unsigned int* bcMatD,
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   real* DDStart,
-												   real* DD27,
-												   int size_Mat,
-												   bool EvenOrOdd);
+                                                   unsigned int* bcMatD,
+                                                   unsigned int* neighborX,
+                                                   unsigned int* neighborY,
+                                                   unsigned int* neighborZ,
+                                                   real* DDStart,
+                                                   real* DD27,
+                                                   int size_Mat,
+                                                   bool EvenOrOdd);
 
 __global__ void LBInit27( int myid,
                                      int numprocs,
@@ -296,21 +296,21 @@ __global__ void InitAD27(unsigned int* neighborX,
                                        bool EvenOrOdd);
 
 __global__ void LB_PostProcessor_F3_2018_Fehlberg(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* rhoOut,
-	real* vxOut,
-	real* vyOut,
-	real* vzOut,
-	real* DDStart,
-	real* G6,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd);
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rhoOut,
+    real* vxOut,
+    real* vyOut,
+    real* vzOut,
+    real* DDStart,
+    real* G6,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd);
 
 __global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
@@ -338,17 +338,17 @@ __global__ void LBCalcMacSP27( real* vxD,
                                           bool isEvenTimestep);
 
 __global__ void LBCalcMacCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep);
+                                              real* vyD,
+                                              real* vzD,
+                                              real* rhoD,
+                                              real* pressD,
+                                              unsigned int* geoD,
+                                              unsigned int* neighborX,
+                                              unsigned int* neighborY,
+                                              unsigned int* neighborZ,
+                                              unsigned int size_Mat,
+                                              real* DD,
+                                              bool isEvenTimestep);
 
 __global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
@@ -360,26 +360,26 @@ __global__ void CalcConc7( real* Conc,
                                           bool isEvenTimestep);
 
 __global__ void GetPlaneConc7(real* Conc,
-								            int* kPC,
-								            unsigned int numberOfPointskPC,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											real* DD7,
-											bool isEvenTimestep);
+                                            int* kPC,
+                                            unsigned int numberOfPointskPC,
+                                            unsigned int* geoD,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            real* DD7,
+                                            bool isEvenTimestep);
 
 __global__ void GetPlaneConc27(real* Conc,
-								             int* kPC,
-								             unsigned int numberOfPointskPC,
-											 unsigned int* geoD,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 real* DD27,
-											 bool isEvenTimestep);
+                                             int* kPC,
+                                             unsigned int numberOfPointskPC,
+                                             unsigned int* geoD,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             real* DD27,
+                                             bool isEvenTimestep);
 
 __global__ void CalcConc27(real* Conc,
                                           unsigned int* geoD,
@@ -404,33 +404,33 @@ __global__ void LBCalcMedSP27( real* vxD,
                                           bool isEvenTimestep);
 
 __global__ void LBCalcMedCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool isEvenTimestep);
+                                              real* vyD,
+                                              real* vzD,
+                                              real* rhoD,
+                                              real* pressD,
+                                              unsigned int* geoD,
+                                              unsigned int* neighborX,
+                                              unsigned int* neighborY,
+                                              unsigned int* neighborZ,
+                                              unsigned int size_Mat,
+                                              real* DD,
+                                              bool isEvenTimestep);
 
 __global__ void LBCalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    real* DD,
+    real* DD_AD,
+    bool isEvenTimestep);
 
 __global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
@@ -446,115 +446,115 @@ __global__ void LBCalcMacMedSP27( real* vxD,
                                              bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int size_Mat,
+    bool isEvenTimestep);
 
 __global__ void LBResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int size_Mat,
+    bool isEvenTimestep);
 
 __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
-														real* kyzFromfcNEQ,
-														real* kxzFromfcNEQ,
-														real* kxxMyyFromfcNEQ,
-														real* kxxMzzFromfcNEQ,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep);
+                                                        real* kyzFromfcNEQ,
+                                                        real* kxzFromfcNEQ,
+                                                        real* kxxMyyFromfcNEQ,
+                                                        real* kxxMzzFromfcNEQ,
+                                                        unsigned int* geoD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        unsigned int size_Mat,
+                                                        real* DD,
+                                                        bool isEvenTimestep);
 
 __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
-													real* kyzFromfcNEQ,
-													real* kxzFromfcNEQ,
-													real* kxxMyyFromfcNEQ,
-													real* kxxMzzFromfcNEQ,
-													unsigned int* geoD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													real* DD,
-													bool isEvenTimestep);
+                                                    real* kyzFromfcNEQ,
+                                                    real* kxzFromfcNEQ,
+                                                    real* kxxMyyFromfcNEQ,
+                                                    real* kxxMzzFromfcNEQ,
+                                                    unsigned int* geoD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned int size_Mat,
+                                                    real* DD,
+                                                    bool isEvenTimestep);
 
 __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
-														real* CUMabc,
-														real* CUMbac,
-														real* CUMbca,
-														real* CUMcba,
-														real* CUMacb,
-														real* CUMcab,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        real* CUMabc,
+                                                        real* CUMbac,
+                                                        real* CUMbca,
+                                                        real* CUMcba,
+                                                        real* CUMacb,
+                                                        real* CUMcab,
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        int size_Mat,
+                                                        bool EvenOrOdd);
 
 __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
-													real* CUMabc,
-													real* CUMbac,
-													real* CUMbca,
-													real* CUMcba,
-													real* CUMacb,
-													real* CUMcab,
-													unsigned int* bcMatD,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													real* DDStart,
-													int size_Mat,
-													bool EvenOrOdd);
+                                                    real* CUMabc,
+                                                    real* CUMbac,
+                                                    real* CUMbca,
+                                                    real* CUMcba,
+                                                    real* CUMacb,
+                                                    real* CUMcab,
+                                                    unsigned int* bcMatD,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    real* DDStart,
+                                                    int size_Mat,
+                                                    bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
-															real* CUMbcb,
-															real* CUMbbc,
-															real* CUMcca,
-															real* CUMcac,
-															real* CUMacc,
-															real* CUMbcc,
-															real* CUMcbc,
-															real* CUMccb,
-															real* CUMccc,
-															unsigned int* bcMatD,
-															unsigned int* neighborX,
-															unsigned int* neighborY,
-															unsigned int* neighborZ,
-															real* DDStart,
-															int size_Mat,
-															bool EvenOrOdd);
+                                                            real* CUMbcb,
+                                                            real* CUMbbc,
+                                                            real* CUMcca,
+                                                            real* CUMcac,
+                                                            real* CUMacc,
+                                                            real* CUMbcc,
+                                                            real* CUMcbc,
+                                                            real* CUMccb,
+                                                            real* CUMccc,
+                                                            unsigned int* bcMatD,
+                                                            unsigned int* neighborX,
+                                                            unsigned int* neighborY,
+                                                            unsigned int* neighborZ,
+                                                            real* DDStart,
+                                                            int size_Mat,
+                                                            bool EvenOrOdd);
 
 __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
-														real* CUMbcb,
-														real* CUMbbc,
-														real* CUMcca,
-														real* CUMcac,
-														real* CUMacc,
-														real* CUMbcc,
-														real* CUMcbc,
-														real* CUMccb,
-														real* CUMccc,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DDStart,
-														int size_Mat,
-														bool EvenOrOdd);
+                                                        real* CUMbcb,
+                                                        real* CUMbbc,
+                                                        real* CUMcca,
+                                                        real* CUMcac,
+                                                        real* CUMacc,
+                                                        real* CUMbcc,
+                                                        real* CUMcbc,
+                                                        real* CUMccb,
+                                                        real* CUMccc,
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DDStart,
+                                                        int size_Mat,
+                                                        bool EvenOrOdd);
 
 __global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* vyMP,
@@ -612,60 +612,60 @@ __global__ void QDevice27(real* distributions,
                                      bool isEvenTimestep);
 
 __global__ void QDeviceComp27(
-										 real* distributions,
-										 int* subgridDistanceIndices,
-										 real* subgridDistances,
-										 unsigned int numberOfBCnodes,
-										 real omega,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int numberOfLBnodes,
-										 bool isEvenTimestep);
+                                         real* distributions,
+                                         int* subgridDistanceIndices,
+                                         real* subgridDistances,
+                                         unsigned int numberOfBCnodes,
+                                         real omega,
+                                         unsigned int* neighborX,
+                                         unsigned int* neighborY,
+                                         unsigned int* neighborZ,
+                                         unsigned int numberOfLBnodes,
+                                         bool isEvenTimestep);
 
 __global__ void QDeviceCompThinWallsPartOne27(real* DD,
-														 int* k_Q,
-														 real* QQ,
-														 unsigned int numberOfBCnodes,
-														 real om1,
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat,
-														 bool isEvenTimestep);
-
-__global__ void QDevice3rdMomentsComp27(	 real* distributions, 
-													 int* subgridDistanceIndices, 
-													 real* subgridDistances,
-													 unsigned int numberOfBCnodes, 
-													 real omega, 
-													 unsigned int* neighborX,
-													 unsigned int* neighborY,
-													 unsigned int* neighborZ,
-													 unsigned int numberOfLBnodes, 
-													 bool isEvenTimestep);
+                                                         int* k_Q,
+                                                         real* QQ,
+                                                         unsigned int numberOfBCnodes,
+                                                         real om1,
+                                                         unsigned int* neighborX,
+                                                         unsigned int* neighborY,
+                                                         unsigned int* neighborZ,
+                                                         unsigned int size_Mat,
+                                                         bool isEvenTimestep);
+
+__global__ void QDevice3rdMomentsComp27(	 real* distributions,
+                                                     int* subgridDistanceIndices,
+                                                     real* subgridDistances,
+                                                     unsigned int numberOfBCnodes,
+                                                     real omega,
+                                                     unsigned int* neighborX,
+                                                     unsigned int* neighborY,
+                                                     unsigned int* neighborZ,
+                                                     unsigned int numberOfLBnodes,
+                                                     bool isEvenTimestep);
 
 __global__ void QDeviceIncompHighNu27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int numberOfLBnodes,
+                                                 bool isEvenTimestep);
 
 __global__ void QDeviceCompHighNu27(	 real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int size_Mat,
+                                                 bool isEvenTimestep);
 
 //Velocity BCs
 __global__ void QVelDevPlainBB27(
@@ -683,39 +683,39 @@ __global__ void QVelDevPlainBB27(
     bool isEvenTimestep);
 
 __global__ void QVelDevCouette27(real* vx,
-											real* vy,
-											real* vz,
-											real* DD,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* vy,
+                                            real* vz,
+                                            real* DD,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void QVelDev1h27( int inx,
-										int iny,
-										real* vx,
-										real* vy,
-										real* vz,
-										real* DD,
-										int* k_Q,
-										real* QQ,
-										unsigned int numberOfBCnodes,
-										real om1,
-										real Phi,
-										real angularVelocity,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* coordX,
-										real* coordY,
-										real* coordZ,
-										unsigned int size_Mat,
-										bool isEvenTimestep);
+                                        int iny,
+                                        real* vx,
+                                        real* vy,
+                                        real* vz,
+                                        real* DD,
+                                        int* k_Q,
+                                        real* QQ,
+                                        unsigned int numberOfBCnodes,
+                                        real om1,
+                                        real Phi,
+                                        real angularVelocity,
+                                        unsigned int* neighborX,
+                                        unsigned int* neighborY,
+                                        unsigned int* neighborZ,
+                                        real* coordX,
+                                        real* coordY,
+                                        real* coordZ,
+                                        unsigned int size_Mat,
+                                        bool isEvenTimestep);
 
 __global__ void QVelDevice27(int inx,
                                         int iny,
@@ -734,107 +734,107 @@ __global__ void QVelDevice27(int inx,
                                         bool isEvenTimestep);
 
 __global__ void QVelDeviceCompPlusSlip27(real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned int size_Mat,
+                                                    bool isEvenTimestep);
 
 __global__ void QVelDeviceComp27(real* velocityX,
-											real* velocityY,
-											real* velocityZ,
-											real* distribution,
-											int* subgridDistanceIndices,
-											real* subgridDistances,
-											unsigned int numberOfBCnodes,
-											real omega,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int numberOfLBnodes,
-											bool isEvenTimestep);
+                                            real* velocityY,
+                                            real* velocityZ,
+                                            real* distribution,
+                                            int* subgridDistanceIndices,
+                                            real* subgridDistances,
+                                            unsigned int numberOfBCnodes,
+                                            real omega,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int numberOfLBnodes,
+                                            bool isEvenTimestep);
 
 __global__ void QVelDeviceCompThinWallsPartOne27(
-	real* vx,
-	real* vy,
-	real* vz,
-	real* DD,
-	int* k_Q,
-	real* QQ,
-	uint numberOfBCnodes,
-	real om1,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real* vx,
+    real* vy,
+    real* vz,
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    uint numberOfBCnodes,
+    real om1,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint size_Mat,
+    bool isEvenTimestep);
 
 __global__ void QThinWallsPartTwo27(
-	real* DD,
-	int* k_Q,
-	real* QQ,
-	uint numberOfBCnodes,
-	uint* geom,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint* neighborWSB,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real* DD,
+    int* k_Q,
+    real* QQ,
+    uint numberOfBCnodes,
+    uint* geom,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint* neighborWSB,
+    uint size_Mat,
+    bool isEvenTimestep);
 
 __global__ void QVelDeviceCompZeroPress27(
-	real* velocityX,
-	real* velocityY,
-	real* velocityZ,
-	real* distribution,
-	int* subgridDistanceIndices,
-	real* subgridDistances,
-	unsigned int numberOfBCnodes,
-	real omega,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int numberOfLBnodes,
-	bool isEvenTimestep);
+    real* velocityX,
+    real* velocityY,
+    real* velocityZ,
+    real* distribution,
+    int* subgridDistanceIndices,
+    real* subgridDistances,
+    unsigned int numberOfBCnodes,
+    real omega,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int numberOfLBnodes,
+    bool isEvenTimestep);
 
 __global__ void QVelDeviceIncompHighNu27(real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned int size_Mat,
+                                                    bool isEvenTimestep);
 
 __global__ void QVelDeviceCompHighNu27(	real* vx,
-													real* vy,
-													real* vz,
-													real* DD,
-													int* k_Q,
-													real* QQ,
-													unsigned int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* vy,
+                                                    real* vz,
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    real* QQ,
+                                                    unsigned int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned int size_Mat,
+                                                    bool isEvenTimestep);
 
 __global__ void QVeloDeviceEQ27(real* VeloX,
-										   real* VeloY,
-										   real* VeloZ,
+                                           real* VeloY,
+                                           real* VeloZ,
                                            real* DD,
                                            int* k_Q,
                                            int numberOfBCnodes,
@@ -846,18 +846,18 @@ __global__ void QVeloDeviceEQ27(real* VeloX,
                                            bool isEvenTimestep);
 
 __global__ void QVeloStreetDeviceEQ27(
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep);
+    real* veloXfraction,
+    real* veloYfraction,
+    int*  naschVelo,
+    real* DD,
+    int*  naschIndex,
+    int   numberOfStreetNodes,
+    real  velocityRatio,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint  size_Mat,
+    bool  isEvenTimestep);
 
 //Slip BCs
 __global__ void QSlipDevice27(real* DD,
@@ -872,135 +872,135 @@ __global__ void QSlipDevice27(real* DD,
                                          bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27(real* DD,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
 
 __global__ void QSlipDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
+                                    real* distributions,
+                                    int* subgridDistanceIndices,
                                     real* subgridDistances,
                                     unsigned int numberOfBCnodes,
-                                    real omega, 
+                                    real omega,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
+                                    unsigned int numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipPressureDeviceComp27TurbViscosity(
-                                    real* distributions, 
-                                    int* subgridDistanceIndices, 
+                                    real* distributions,
+                                    int* subgridDistanceIndices,
                                     real* subgridDistances,
                                     unsigned int numberOfBCnodes,
-                                    real omega, 
+                                    real omega,
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     real* turbViscosity,
-                                    unsigned int numberOfLBnodes, 
+                                    unsigned int numberOfLBnodes,
                                     bool isEvenTimestep);
 
 __global__ void QSlipGeomDeviceComp27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 real* NormalX,
+                                                 real* NormalY,
+                                                 real* NormalZ,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int size_Mat,
+                                                 bool isEvenTimestep);
 
 __global__ void QSlipNormDeviceComp27(real* DD,
-												 int* k_Q,
-												 real* QQ,
-												 unsigned int numberOfBCnodes,
-												 real om1,
-												 real* NormalX,
-												 real* NormalY,
-												 real* NormalZ,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+                                                 int* k_Q,
+                                                 real* QQ,
+                                                 unsigned int numberOfBCnodes,
+                                                 real om1,
+                                                 real* NormalX,
+                                                 real* NormalY,
+                                                 real* NormalZ,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int size_Mat,
+                                                 bool isEvenTimestep);
 
 // Stress BCs (wall model)
 __global__ void QStressDeviceComp27(real* DD,
-											   int* k_Q,
-											 int* k_N,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 real* turbViscosity,
-										     real* vx,
-											 real* vy,
-                                    	     real* vz,
-											 real* normalX,
-											 real* normalY,
-                                    	     real* normalZ,
-											 real* vx_bc,
-											 real* vy_bc,
-                                    	     real* vz_bc,
-											 real* vx1,
-                                    		 real* vy1,
-                                    		 real* vz1,
-											 int* samplingOffset,
-											 real* z0,
-											 bool  hasWallModelMonitor,
-											real* u_star_monitor,
-											real* Fx_monitor,
-											real* Fy_monitor,
-											real* Fz_monitor,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                               int* k_Q,
+                                             int* k_N,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             real* turbViscosity,
+                                             real* vx,
+                                             real* vy,
+                                             real* vz,
+                                             real* normalX,
+                                             real* normalY,
+                                             real* normalZ,
+                                             real* vx_bc,
+                                             real* vy_bc,
+                                             real* vz_bc,
+                                             real* vx1,
+                                             real* vy1,
+                                             real* vz1,
+                                             int* samplingOffset,
+                                             real* z0,
+                                             bool  hasWallModelMonitor,
+                                            real* u_star_monitor,
+                                            real* Fx_monitor,
+                                            real* Fy_monitor,
+                                            real* Fz_monitor,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
 
 __global__ void BBStressDevice27( real* DD,
-												int* k_Q,
-												int* k_N,
-												real* QQ,
-												unsigned int numberOfBCnodes,
-												real* vx,
-												real* vy,
-												real* vz,
-												real* normalX,
-												real* normalY,
-												real* normalZ,
-												real* vx_bc,
-												real* vy_bc,
-												real* vz_bc,
-												real* vx1,
-												real* vy1,
-												real* vz1,
-												int* samplingOffset,
-												real* z0,
-												bool  hasWallModelMonitor,
-												real* u_star_monitor,
-												real* Fx_monitor,
-												real* Fy_monitor,
-												real* Fz_monitor,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* k_Q,
+                                                int* k_N,
+                                                real* QQ,
+                                                unsigned int numberOfBCnodes,
+                                                real* vx,
+                                                real* vy,
+                                                real* vz,
+                                                real* normalX,
+                                                real* normalY,
+                                                real* normalZ,
+                                                real* vx_bc,
+                                                real* vy_bc,
+                                                real* vz_bc,
+                                                real* vx1,
+                                                real* vy1,
+                                                real* vz1,
+                                                int* samplingOffset,
+                                                real* z0,
+                                                bool  hasWallModelMonitor,
+                                                real* u_star_monitor,
+                                                real* Fx_monitor,
+                                                real* Fy_monitor,
+                                                real* Fz_monitor,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned int size_Mat,
+                                                bool isEvenTimestep);
 
 __global__ void BBStressPressureDevice27( real* DD,
-											            int* k_Q,
+                                                        int* k_Q,
                                              int* k_N,
                                              real* QQ,
                                              unsigned int  numberOfBCnodes,
@@ -1043,19 +1043,19 @@ __global__ void QPressDevice27( real* rhoBC,
                                            bool isEvenTimestep);
 
 __global__ void QPressDeviceAntiBB27(   real* rhoBC,
-												   real* vx,
-												   real* vy,
-												   real* vz,
-												   real* DD,
-												   int* k_Q,
-												   real* QQ,
-												   int numberOfBCnodes,
-												   real om1,
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   unsigned int size_Mat,
-												   bool isEvenTimestep);
+                                                   real* vx,
+                                                   real* vy,
+                                                   real* vz,
+                                                   real* DD,
+                                                   int* k_Q,
+                                                   real* QQ,
+                                                   int numberOfBCnodes,
+                                                   real om1,
+                                                   unsigned int* neighborX,
+                                                   unsigned int* neighborY,
+                                                   unsigned int* neighborZ,
+                                                   unsigned int size_Mat,
+                                                   bool isEvenTimestep);
 
 __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD,
@@ -1080,43 +1080,43 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      bool isEvenTimestep);
 
 __global__ void QPressNoRhoDevice27(  real* rhoBC,
-												 real* distributions,
-												 int* k_Q,
-												 int* k_N,
-												 int numberOfBCnodes,
-												 real om1,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes,
-												 bool isEvenTimestep,
-												 int direction);
+                                                 real* distributions,
+                                                 int* k_Q,
+                                                 int* k_N,
+                                                 int numberOfBCnodes,
+                                                 real om1,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int numberOfLBnodes,
+                                                 bool isEvenTimestep,
+                                                 int direction);
 
 __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
-											real* distributions, 
-											int* k_Q, 
-											int* k_N, 
-											int numberOfBCnodes, 
-											real om1, 
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int numberOfLBnodes, 
-											bool isEvenTimestep,
-											int direction,
-											real densityCorrectionFactor);
+                                            real* distributions,
+                                            int* k_Q,
+                                            int* k_N,
+                                            int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int numberOfLBnodes,
+                                            bool isEvenTimestep,
+                                            int direction,
+                                            real densityCorrectionFactor);
 
 __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
-														 real* DD,
-														 int* k_Q,
-														 int* k_N,
-														 int numberOfBCnodes,
-														 real om1,
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat,
-														 bool isEvenTimestep);
+                                                         real* DD,
+                                                         int* k_Q,
+                                                         int* k_N,
+                                                         int numberOfBCnodes,
+                                                         real om1,
+                                                         unsigned int* neighborX,
+                                                         unsigned int* neighborY,
+                                                         unsigned int* neighborZ,
+                                                         unsigned int size_Mat,
+                                                         bool isEvenTimestep);
 
 __global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD,
@@ -1131,16 +1131,16 @@ __global__ void QPressDeviceOld27(real* rhoBC,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
-													real* DD,
-													int* k_Q,
-													int* k_N,
-													int numberOfBCnodes,
-													real om1,
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat,
-													bool isEvenTimestep);
+                                                    real* DD,
+                                                    int* k_Q,
+                                                    int* k_N,
+                                                    int numberOfBCnodes,
+                                                    real om1,
+                                                    unsigned int* neighborX,
+                                                    unsigned int* neighborY,
+                                                    unsigned int* neighborZ,
+                                                    unsigned int size_Mat,
+                                                    bool isEvenTimestep);
 
 __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution,
@@ -1168,13 +1168,13 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
                                              bool isEvenTimestep);
 
 __global__ void QPressDeviceZero27(  real* DD,
-												int* k_Q,
-												unsigned int numberOfBCnodes,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* k_Q,
+                                                unsigned int numberOfBCnodes,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned int size_Mat,
+                                                bool isEvenTimestep);
 
 __global__ void QPressDeviceFake27(real* rhoBC,
                                              real* DD,
@@ -1199,16 +1199,16 @@ __global__ void BBDevice27(real* distributions,
                                      bool isEvenTimestep);
 
 __global__ void QPressDevice27_IntBB(real* rho,
-												real* DD,
-												int* k_Q,
-												real* QQ,
-												unsigned int numberOfBCnodes,
-												real om1,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                real* DD,
+                                                int* k_Q,
+                                                real* QQ,
+                                                unsigned int numberOfBCnodes,
+                                                real om1,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned int size_Mat,
+                                                bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //Schlaffer BCs
@@ -1250,18 +1250,18 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
                                                 real omega,
                                                 real* distributions,
                                                 real* subgridDistances,
-                                                uint* neighborX, 
-                                                uint* neighborY, 
+                                                uint* neighborX,
+                                                uint* neighborY,
                                                 uint* neighborZ,
-                                                uint* neighborsNT, 
+                                                uint* neighborsNT,
                                                 uint* neighborsNB,
                                                 uint* neighborsST,
                                                 uint* neighborsSB,
-                                                real* weights0PP, 
+                                                real* weights0PP,
                                                 real* weights0PM,
                                                 real* weights0MP,
                                                 real* weights0MM,
-                                                real* vLast, 
+                                                real* vLast,
                                                 real* vCurrent,
                                                 real velocityX,
                                                 real velocityY,
@@ -1276,18 +1276,18 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
                                         int numberOfPrecursorNodes,
                                         real omega,
                                         real* distributions,
-                                        uint* neighborX, 
-                                        uint* neighborY, 
+                                        uint* neighborX,
+                                        uint* neighborY,
                                         uint* neighborZ,
-                                        uint* neighborsNT, 
+                                        uint* neighborsNT,
                                         uint* neighborsNB,
                                         uint* neighborsST,
                                         uint* neighborsSB,
-                                        real* weights0PP, 
+                                        real* weights0PP,
                                         real* weights0PM,
                                         real* weights0MP,
                                         real* weights0MM,
-                                        real* vLast, 
+                                        real* vLast,
                                         real* vCurrent,
                                         real velocityX,
                                         real velocityY,
@@ -1298,48 +1298,48 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
                                         bool isEvenTimestep);
 
 __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
-												int numberOfBCNodes,
-												int numberOfPrecursorNodes,
-												real* distributions,
-												uint* neighborX, 
-												uint* neighborY, 
-												uint* neighborZ,
-												uint* neighborsNT, 
-												uint* neighborsNB,
-												uint* neighborsST,
-												uint* neighborsSB,
-												real* weights0PP, 
-												real* weights0PM,
-												real* weights0MP,
-												real* weights0MM,
-												real* fsLast, 
-												real* fsNext,
-												real timeRatio,
-												unsigned long long numberOfLBnodes,
-												bool isEvenTimestep);
+                                                int numberOfBCNodes,
+                                                int numberOfPrecursorNodes,
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighborsNT,
+                                                uint* neighborsNB,
+                                                uint* neighborsST,
+                                                uint* neighborsSB,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
 __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
-												real* subgridDistances,
-												int sizeQ,
-												int numberOfBCNodes,
-												int numberOfPrecursorNodes,
-												real* distributions,
-												uint* neighborX, 
-												uint* neighborY, 
-												uint* neighborZ,
-												uint* neighborsNT, 
-												uint* neighborsNB,
-												uint* neighborsST,
-												uint* neighborsSB,
-												real* weights0PP, 
-												real* weights0PM,
-												real* weights0MP,
-												real* weights0MM,
-												real* fsLast, 
-												real* fsNext,
-												real timeRatio,
-												unsigned long long numberOfLBnodes,
-												bool isEvenTimestep);
-												
+                                                real* subgridDistances,
+                                                int sizeQ,
+                                                int numberOfBCNodes,
+                                                int numberOfPrecursorNodes,
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighborsNT,
+                                                uint* neighborsNB,
+                                                uint* neighborsST,
+                                                uint* neighborsSB,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep);
+
 //Advection / Diffusion BCs
 __global__ void QAD7( real* DD,
                                  real* DD7,
@@ -1358,62 +1358,62 @@ __global__ void QAD7( real* DD,
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref Advection_Diffusion_Device_Kernel : Factorized central moments for Advection Diffusion Equation
 __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
-	real omegaDiffusivity,
-	uint* typeOfGridNode,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	real* distributionsAD,
-	int size_Mat,
-	real* forces,
-	bool isEvenTimestep);
+    real omegaDiffusivity,
+    uint* typeOfGridNode,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    real* distributionsAD,
+    int size_Mat,
+    real* forces,
+    bool isEvenTimestep);
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref AD_SlipVelDeviceComp : device function for the slip-AD boundary condition
 __global__ void AD_SlipVelDeviceComp(
-	real * normalX,
-	real * normalY,
-	real * normalZ,
-	real * distributions,
-	real * distributionsAD,
-	int* QindexArray,
-	real * Qarrays,
-	uint numberOfBCnodes,
-	real omegaDiffusivity,
-	uint * neighborX,
-	uint * neighborY,
-	uint * neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep);
+    real * normalX,
+    real * normalY,
+    real * normalZ,
+    real * distributions,
+    real * distributionsAD,
+    int* QindexArray,
+    real * Qarrays,
+    uint numberOfBCnodes,
+    real omegaDiffusivity,
+    uint * neighborX,
+    uint * neighborY,
+    uint * neighborZ,
+    uint size_Mat,
+    bool isEvenTimestep);
 
 __global__ void QADDirichlet27(   real* DD,
-											 real* DD27,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD27,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
 
 __global__ void QADBB27(  real* DD,
-									 real* DD27,
-									 real* temp,
-									 real diffusivity,
-									 int* k_Q,
-									 real* QQ,
-									 unsigned int numberOfBCnodes,
-									 real om1,
-									 unsigned int* neighborX,
-									 unsigned int* neighborY,
-									 unsigned int* neighborZ,
-									 unsigned int size_Mat,
-									 bool isEvenTimestep);
+                                     real* DD27,
+                                     real* temp,
+                                     real diffusivity,
+                                     int* k_Q,
+                                     real* QQ,
+                                     unsigned int numberOfBCnodes,
+                                     real om1,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned int size_Mat,
+                                     bool isEvenTimestep);
 
 __global__ void QADVel7( real* DD,
                                     real* DD7,
@@ -1476,105 +1476,105 @@ __global__ void QADPress27( real* DD,
                                        bool isEvenTimestep);
 
 __global__ void QADPressNEQNeighbor27(
-												 real* DD,
-												 real* DD27,
-												 int* k_Q,
-												 int* k_N,
-												 int numberOfBCnodes,
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep
-												);
+                                                 real* DD,
+                                                 real* DD27,
+                                                 int* k_Q,
+                                                 int* k_N,
+                                                 int numberOfBCnodes,
+                                                 unsigned int* neighborX,
+                                                 unsigned int* neighborY,
+                                                 unsigned int* neighborZ,
+                                                 unsigned int size_Mat,
+                                                 bool isEvenTimestep
+                                                );
 
 __global__ void QNoSlipADincomp7( real* DD,
-											 real* DD7,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD7,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
 
 __global__ void QNoSlipADincomp27( real* DD,
-											 real* DD27,
-											 real* temp,
-											 real diffusivity,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+                                             real* DD27,
+                                             real* temp,
+                                             real diffusivity,
+                                             int* k_Q,
+                                             real* QQ,
+                                             unsigned int numberOfBCnodes,
+                                             real om1,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
 
 __global__ void QADVeloIncomp7(  real* DD,
-											real* DD7,
-											real* temp,
-											real* velo,
-											real diffusivity,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* DD7,
+                                            real* temp,
+                                            real* velo,
+                                            real diffusivity,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void QADVeloIncomp27( real* DD,
-											real* DD27,
-											real* temp,
-											real* velo,
-											real diffusivity,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* DD27,
+                                            real* temp,
+                                            real* velo,
+                                            real diffusivity,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void QADPressIncomp7(real* DD,
-										   real* DD7,
-										   real* temp,
-										   real* velo,
-										   real diffusivity,
-										   int* k_Q,
-										   real* QQ,
-										   unsigned int numberOfBCnodes,
-										   real om1,
-										   unsigned int* neighborX,
-										   unsigned int* neighborY,
-										   unsigned int* neighborZ,
-										   unsigned int size_Mat,
-										   bool isEvenTimestep);
+                                           real* DD7,
+                                           real* temp,
+                                           real* velo,
+                                           real diffusivity,
+                                           int* k_Q,
+                                           real* QQ,
+                                           unsigned int numberOfBCnodes,
+                                           real om1,
+                                           unsigned int* neighborX,
+                                           unsigned int* neighborY,
+                                           unsigned int* neighborZ,
+                                           unsigned int size_Mat,
+                                           bool isEvenTimestep);
 
 __global__ void QADPressIncomp27(   real* DD,
-											   real* DD27,
-											   real* temp,
-											   real* velo,
-											   real diffusivity,
-											   int* k_Q,
-											   real* QQ,
-											   unsigned int numberOfBCnodes,
-											   real om1,
-											   unsigned int* neighborX,
-											   unsigned int* neighborY,
-											   unsigned int* neighborZ,
-											   unsigned int size_Mat,
-											   bool isEvenTimestep);
+                                               real* DD27,
+                                               real* temp,
+                                               real* velo,
+                                               real diffusivity,
+                                               int* k_Q,
+                                               real* QQ,
+                                               unsigned int numberOfBCnodes,
+                                               real om1,
+                                               unsigned int* neighborX,
+                                               unsigned int* neighborY,
+                                               unsigned int* neighborZ,
+                                               unsigned int size_Mat,
+                                               bool isEvenTimestep);
 
 //Propeller BC
 __global__ void PropellerBC(unsigned int* neighborX,
@@ -1585,7 +1585,7 @@ __global__ void PropellerBC(unsigned int* neighborX,
                                        real* uy,
                                        real* uz,
                                        int* k_Q,
-									   unsigned int size_Prop,
+                                       unsigned int size_Prop,
                                        unsigned int size_Mat,
                                        unsigned int* bcMatD,
                                        real* DD,
@@ -1602,19 +1602,19 @@ __global__ void scaleCF27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-										       unsigned int size_MatC,
-										       unsigned int size_MatF,
-										       bool isEvenTimestep,
+                                               unsigned int size_MatC,
+                                               unsigned int size_MatF,
+                                               bool isEvenTimestep,
                                      unsigned int* posCSWB,
                                      unsigned int* posFSWB,
                                      unsigned int kCF,
-										       real omCoarse,
-										       real omFine,
-										       real nu,
-										       unsigned int nxC,
-										       unsigned int nyC,
-										       unsigned int nxF,
-										       unsigned int nyF);
+                                               real omCoarse,
+                                               real omFine,
+                                               real nu,
+                                               unsigned int nxC,
+                                               unsigned int nyC,
+                                               unsigned int nxF,
+                                               unsigned int nyF);
 
 __global__ void scaleCFEff27(real* DC,
                                         real* DF,
@@ -1624,18 +1624,18 @@ __global__ void scaleCFEff27(real* DC,
                                         unsigned int* neighborFX,
                                         unsigned int* neighborFY,
                                         unsigned int* neighborFZ,
-									             unsigned int size_MatC,
-									             unsigned int size_MatF,
-									             bool isEvenTimestep,
+                                                 unsigned int size_MatC,
+                                                 unsigned int size_MatF,
+                                                 bool isEvenTimestep,
                                         unsigned int* posCSWB,
                                         unsigned int* posFSWB,
                                         unsigned int kCF,
-									             real omCoarse,
-									             real omFine,
-									             real nu,
-									             unsigned int nxC,
-									             unsigned int nyC,
-									             unsigned int nxF,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
                                         unsigned int nyF,
                                         OffCF offCF);
 
@@ -1709,233 +1709,233 @@ __global__ void scaleCF_Fix_27(real* DC,
                                           OffCF offCF);
 
 __global__ void scaleCF_Fix_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_0817_comp_27(  real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
-													  real* DF,
-													  real* G6,
-													  unsigned int* neighborCX,
-													  unsigned int* neighborCY,
-													  unsigned int* neighborCZ,
-													  unsigned int* neighborFX,
-													  unsigned int* neighborFY,
-													  unsigned int* neighborFZ,
-													  unsigned int size_MatC,
-													  unsigned int size_MatF,
-													  bool isEvenTimestep,
-													  unsigned int* posCSWB,
-													  unsigned int* posFSWB,
-													  unsigned int kCF,
-													  real omCoarse,
-													  real omFine,
-													  real nu,
-													  unsigned int nxC,
-													  unsigned int nyC,
-													  unsigned int nxF,
-													  unsigned int nyF,
-													  OffCF offCF);
+                                                      real* DF,
+                                                      real* G6,
+                                                      unsigned int* neighborCX,
+                                                      unsigned int* neighborCY,
+                                                      unsigned int* neighborCZ,
+                                                      unsigned int* neighborFX,
+                                                      unsigned int* neighborFY,
+                                                      unsigned int* neighborFZ,
+                                                      unsigned int size_MatC,
+                                                      unsigned int size_MatF,
+                                                      bool isEvenTimestep,
+                                                      unsigned int* posCSWB,
+                                                      unsigned int* posFSWB,
+                                                      unsigned int kCF,
+                                                      real omCoarse,
+                                                      real omFine,
+                                                      real nu,
+                                                      unsigned int nxC,
+                                                      unsigned int nyC,
+                                                      unsigned int nxF,
+                                                      unsigned int nyF,
+                                                      OffCF offCF);
 
 __global__ void scaleCF_comp_D3Q27F3( real* DC,
-												 real* DF,
-												 real* G6,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posCSWB,
-												 unsigned int* posFSWB,
-												 unsigned int kCF,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 OffCF offCF);
+                                                 real* DF,
+                                                 real* G6,
+                                                 unsigned int* neighborCX,
+                                                 unsigned int* neighborCY,
+                                                 unsigned int* neighborCZ,
+                                                 unsigned int* neighborFX,
+                                                 unsigned int* neighborFY,
+                                                 unsigned int* neighborFZ,
+                                                 unsigned int size_MatC,
+                                                 unsigned int size_MatF,
+                                                 bool isEvenTimestep,
+                                                 unsigned int* posCSWB,
+                                                 unsigned int* posFSWB,
+                                                 unsigned int kCF,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
+                                                 unsigned int nyF,
+                                                 OffCF offCF);
 
 
 __global__ void scaleCF_staggered_time_comp_27(real* DC,
-														  real* DF,
-														  unsigned int* neighborCX,
-														  unsigned int* neighborCY,
-														  unsigned int* neighborCZ,
-														  unsigned int* neighborFX,
-														  unsigned int* neighborFY,
-														  unsigned int* neighborFZ,
-														  unsigned int size_MatC,
-														  unsigned int size_MatF,
-														  bool isEvenTimestep,
-														  unsigned int* posCSWB,
-														  unsigned int* posFSWB,
-														  unsigned int kCF,
-														  real omCoarse,
-														  real omFine,
-														  real nu,
-														  unsigned int nxC,
-														  unsigned int nyC,
-														  unsigned int nxF,
-														  unsigned int nyF,
-														  OffCF offCF);
+                                                          real* DF,
+                                                          unsigned int* neighborCX,
+                                                          unsigned int* neighborCY,
+                                                          unsigned int* neighborCZ,
+                                                          unsigned int* neighborFX,
+                                                          unsigned int* neighborFY,
+                                                          unsigned int* neighborFZ,
+                                                          unsigned int size_MatC,
+                                                          unsigned int size_MatF,
+                                                          bool isEvenTimestep,
+                                                          unsigned int* posCSWB,
+                                                          unsigned int* posFSWB,
+                                                          unsigned int kCF,
+                                                          real omCoarse,
+                                                          real omFine,
+                                                          real nu,
+                                                          unsigned int nxC,
+                                                          unsigned int nyC,
+                                                          unsigned int nxF,
+                                                          unsigned int nyF,
+                                                          OffCF offCF);
 
 __global__ void scaleCF_RhoSq_comp_27( real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_compressible(
-    real* distributionsCoarse, 
-    real* distributionsFine, 
+    real* distributionsCoarse,
+    real* distributionsFine,
     unsigned int* neighborXcoarse,
     unsigned int* neighborYcoarse,
     unsigned int* neighborZcoarse,
     unsigned int* neighborXfine,
     unsigned int* neighborYfine,
     unsigned int* neighborZfine,
-    unsigned int numberOfLBnodesCoarse, 
-    unsigned int numberOfLBnodesFine, 
+    unsigned int numberOfLBnodesCoarse,
+    unsigned int numberOfLBnodesFine,
     bool isEvenTimestep,
-    unsigned int* indicesCoarseMMM, 
-    unsigned int* indicesFineMMM, 
-    unsigned int numberOfInterfaceNodes, 
-    real omegaCoarse, 
-    real omegaFine, 
+    unsigned int* indicesCoarseMMM,
+    unsigned int* indicesFineMMM,
+    unsigned int numberOfInterfaceNodes,
+    real omegaCoarse,
+    real omegaFine,
     OffCF offsetCF);
 
 __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
-														real* DF,
-														unsigned int* neighborCX,
-														unsigned int* neighborCY,
-														unsigned int* neighborCZ,
-														unsigned int* neighborFX,
-														unsigned int* neighborFY,
-														unsigned int* neighborFZ,
-														unsigned int size_MatC,
-														unsigned int size_MatF,
-														bool isEvenTimestep,
-														unsigned int* posCSWB,
-														unsigned int* posFSWB,
-														unsigned int kCF,
-														real omCoarse,
-														real omFine,
-														real nu,
-														unsigned int nxC,
-														unsigned int nyC,
-														unsigned int nxF,
-														unsigned int nyF,
-														OffCF offCF);
+                                                        real* DF,
+                                                        unsigned int* neighborCX,
+                                                        unsigned int* neighborCY,
+                                                        unsigned int* neighborCZ,
+                                                        unsigned int* neighborFX,
+                                                        unsigned int* neighborFY,
+                                                        unsigned int* neighborFZ,
+                                                        unsigned int size_MatC,
+                                                        unsigned int size_MatF,
+                                                        bool isEvenTimestep,
+                                                        unsigned int* posCSWB,
+                                                        unsigned int* posFSWB,
+                                                        unsigned int kCF,
+                                                        real omCoarse,
+                                                        real omFine,
+                                                        real nu,
+                                                        unsigned int nxC,
+                                                        unsigned int nyC,
+                                                        unsigned int nxF,
+                                                        unsigned int nyF,
+                                                        OffCF offCF);
 
 __global__ void scaleCF_AA2016_comp_27(real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posCSWB,
-												  unsigned int* posFSWB,
-												  unsigned int kCF,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffCF offCF);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posCSWB,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kCF,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffCF offCF);
 
 __global__ void scaleCF_NSPress_27(real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posCSWB,
-											  unsigned int* posFSWB,
-											  unsigned int kCF,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  OffCF offCF);
+                                              real* DF,
+                                              unsigned int* neighborCX,
+                                              unsigned int* neighborCY,
+                                              unsigned int* neighborCZ,
+                                              unsigned int* neighborFX,
+                                              unsigned int* neighborFY,
+                                              unsigned int* neighborFZ,
+                                              unsigned int size_MatC,
+                                              unsigned int size_MatF,
+                                              bool isEvenTimestep,
+                                              unsigned int* posCSWB,
+                                              unsigned int* posFSWB,
+                                              unsigned int kCF,
+                                              real omCoarse,
+                                              real omFine,
+                                              real nu,
+                                              unsigned int nxC,
+                                              unsigned int nyC,
+                                              unsigned int nxF,
+                                              unsigned int nyF,
+                                              OffCF offCF);
 
 __global__ void scaleCFThSMG7( real* DC,
                                           real* DF,
@@ -1994,7 +1994,7 @@ __global__ void scaleCFThS27(real* DC,
                                         unsigned int kCF,
                                         real nu,
                                         real diffusivity_fine,
-										OffCF offCF);
+                                        OffCF offCF);
 
 //fine to coarse
 __global__ void scaleFC27(real* DC,
@@ -2005,18 +2005,18 @@ __global__ void scaleFC27(real* DC,
                                     unsigned int* neighborFX,
                                     unsigned int* neighborFY,
                                     unsigned int* neighborFZ,
-										       unsigned int size_MatC,
-										       unsigned int size_MatF,
-										       bool isEvenTimestep,
+                                               unsigned int size_MatC,
+                                               unsigned int size_MatF,
+                                               bool isEvenTimestep,
                                      unsigned int* posC,
                                      unsigned int* posFSWB,
                                      unsigned int kFC,
-										       real omCoarse,
-										       real omFine,
-										       real nu,
-										       unsigned int nxC,
-										       unsigned int nyC,
-										       unsigned int nxF,
+                                               real omCoarse,
+                                               real omFine,
+                                               real nu,
+                                               unsigned int nxC,
+                                               unsigned int nyC,
+                                               unsigned int nxF,
                                      unsigned int nyF);
 
 __global__ void scaleFCEff27(real* DC,
@@ -2112,145 +2112,145 @@ __global__ void scaleFC_Fix_27( real* DC,
                                           OffFC offFC);
 
 __global__ void scaleFC_Fix_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_0817_comp_27(  real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
-													  real* DF,
-													  real* G6,
-													  unsigned int* neighborCX,
-													  unsigned int* neighborCY,
-													  unsigned int* neighborCZ,
-													  unsigned int* neighborFX,
-													  unsigned int* neighborFY,
-													  unsigned int* neighborFZ,
-													  unsigned int size_MatC,
-													  unsigned int size_MatF,
-													  bool isEvenTimestep,
-													  unsigned int* posC,
-													  unsigned int* posFSWB,
-													  unsigned int kFC,
-													  real omCoarse,
-													  real omFine,
-													  real nu,
-													  unsigned int nxC,
-													  unsigned int nyC,
-													  unsigned int nxF,
-													  unsigned int nyF,
-													  OffFC offFC);
+                                                      real* DF,
+                                                      real* G6,
+                                                      unsigned int* neighborCX,
+                                                      unsigned int* neighborCY,
+                                                      unsigned int* neighborCZ,
+                                                      unsigned int* neighborFX,
+                                                      unsigned int* neighborFY,
+                                                      unsigned int* neighborFZ,
+                                                      unsigned int size_MatC,
+                                                      unsigned int size_MatF,
+                                                      bool isEvenTimestep,
+                                                      unsigned int* posC,
+                                                      unsigned int* posFSWB,
+                                                      unsigned int kFC,
+                                                      real omCoarse,
+                                                      real omFine,
+                                                      real nu,
+                                                      unsigned int nxC,
+                                                      unsigned int nyC,
+                                                      unsigned int nxF,
+                                                      unsigned int nyF,
+                                                      OffFC offFC);
 
 __global__ void scaleFC_comp_D3Q27F3( real* DC,
-												 real* DF,
-												 real* G6,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posC,
-												 unsigned int* posFSWB,
-												 unsigned int kFC,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 OffFC offFC);
+                                                 real* DF,
+                                                 real* G6,
+                                                 unsigned int* neighborCX,
+                                                 unsigned int* neighborCY,
+                                                 unsigned int* neighborCZ,
+                                                 unsigned int* neighborFX,
+                                                 unsigned int* neighborFY,
+                                                 unsigned int* neighborFZ,
+                                                 unsigned int size_MatC,
+                                                 unsigned int size_MatF,
+                                                 bool isEvenTimestep,
+                                                 unsigned int* posC,
+                                                 unsigned int* posFSWB,
+                                                 unsigned int kFC,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
+                                                 unsigned int nyF,
+                                                 OffFC offFC);
 
 
 __global__ void scaleFC_staggered_time_comp_27(real* DC,
-														  real* DF,
-														  unsigned int* neighborCX,
-														  unsigned int* neighborCY,
-														  unsigned int* neighborCZ,
-														  unsigned int* neighborFX,
-														  unsigned int* neighborFY,
-														  unsigned int* neighborFZ,
-														  unsigned int size_MatC,
-														  unsigned int size_MatF,
-														  bool isEvenTimestep,
-														  unsigned int* posC,
-														  unsigned int* posFSWB,
-														  unsigned int kFC,
-														  real omCoarse,
-														  real omFine,
-														  real nu,
-														  unsigned int nxC,
-														  unsigned int nyC,
-														  unsigned int nxF,
-														  unsigned int nyF,
-														  OffFC offFC);
+                                                          real* DF,
+                                                          unsigned int* neighborCX,
+                                                          unsigned int* neighborCY,
+                                                          unsigned int* neighborCZ,
+                                                          unsigned int* neighborFX,
+                                                          unsigned int* neighborFY,
+                                                          unsigned int* neighborFZ,
+                                                          unsigned int size_MatC,
+                                                          unsigned int size_MatF,
+                                                          bool isEvenTimestep,
+                                                          unsigned int* posC,
+                                                          unsigned int* posFSWB,
+                                                          unsigned int kFC,
+                                                          real omCoarse,
+                                                          real omFine,
+                                                          real nu,
+                                                          unsigned int nxC,
+                                                          unsigned int nyC,
+                                                          unsigned int nxF,
+                                                          unsigned int nyF,
+                                                          OffFC offFC);
 
 __global__ void scaleFC_RhoSq_comp_27( real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_compressible(
     real *distributionsCoarse,
@@ -2272,73 +2272,73 @@ __global__ void scaleFC_compressible(
     OffFC offsetFC);
 
 __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
-														real* DF,
-														unsigned int* neighborCX,
-														unsigned int* neighborCY,
-														unsigned int* neighborCZ,
-														unsigned int* neighborFX,
-														unsigned int* neighborFY,
-														unsigned int* neighborFZ,
-														unsigned int size_MatC,
-														unsigned int size_MatF,
-														bool isEvenTimestep,
-														unsigned int* posC,
-														unsigned int* posFSWB,
-														unsigned int kFC,
-														real omCoarse,
-														real omFine,
-														real nu,
-														unsigned int nxC,
-														unsigned int nyC,
-														unsigned int nxF,
-														unsigned int nyF,
-														OffFC offFC);
+                                                        real* DF,
+                                                        unsigned int* neighborCX,
+                                                        unsigned int* neighborCY,
+                                                        unsigned int* neighborCZ,
+                                                        unsigned int* neighborFX,
+                                                        unsigned int* neighborFY,
+                                                        unsigned int* neighborFZ,
+                                                        unsigned int size_MatC,
+                                                        unsigned int size_MatF,
+                                                        bool isEvenTimestep,
+                                                        unsigned int* posC,
+                                                        unsigned int* posFSWB,
+                                                        unsigned int kFC,
+                                                        real omCoarse,
+                                                        real omFine,
+                                                        real nu,
+                                                        unsigned int nxC,
+                                                        unsigned int nyC,
+                                                        unsigned int nxF,
+                                                        unsigned int nyF,
+                                                        OffFC offFC);
 
 __global__ void scaleFC_AA2016_comp_27(real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  OffFC offFC);
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  OffFC offFC);
 
 __global__ void scaleFC_NSPress_27(real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posC,
-											  unsigned int* posFSWB,
-											  unsigned int kFC,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  OffFC offFC);
+                                              real* DF,
+                                              unsigned int* neighborCX,
+                                              unsigned int* neighborCY,
+                                              unsigned int* neighborCZ,
+                                              unsigned int* neighborFX,
+                                              unsigned int* neighborFY,
+                                              unsigned int* neighborFZ,
+                                              unsigned int size_MatC,
+                                              unsigned int size_MatF,
+                                              bool isEvenTimestep,
+                                              unsigned int* posC,
+                                              unsigned int* posFSWB,
+                                              unsigned int kFC,
+                                              real omCoarse,
+                                              real omFine,
+                                              real nu,
+                                              unsigned int nxC,
+                                              unsigned int nyC,
+                                              unsigned int nxF,
+                                              unsigned int nyF,
+                                              OffFC offFC);
 
 __global__ void scaleFCThSMG7( real* DC,
                                           real* DF,
@@ -2397,47 +2397,47 @@ __global__ void scaleFCThS27(  real* DC,
                                           unsigned int kFC,
                                           real nu,
                                           real diffusivity_coarse,
-										  OffFC offFC);
+                                          OffFC offFC);
 
 __global__ void DragLiftPost27(  real* DD,
-											int* k_Q,
-											real* QQ,
-											int numberOfBCnodes,
-											double *DragX,
-											double *DragY,
-											double *DragZ,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            int* k_Q,
+                                            real* QQ,
+                                            int numberOfBCnodes,
+                                            double *DragX,
+                                            double *DragY,
+                                            double *DragZ,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void DragLiftPre27(   real* DD,
-											int* k_Q,
-											real* QQ,
-											int numberOfBCnodes,
-											double *DragX,
-											double *DragY,
-											double *DragZ,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            int* k_Q,
+                                            real* QQ,
+                                            int numberOfBCnodes,
+                                            double *DragX,
+                                            double *DragY,
+                                            double *DragZ,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void CalcCP27(real* DD,
-									int* cpIndex,
-									int nonCp,
-									double *cpPress,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep);
+                                    int* cpIndex,
+                                    int nonCp,
+                                    double *cpPress,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep);
 
 __global__ void getSendFsPre27(real* DD,
-										  real* bufferFs,
-										  int* sendIndex,
+                                          real* bufferFs,
+                                          int* sendIndex,
                                           int buffmax,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -2446,8 +2446,8 @@ __global__ void getSendFsPre27(real* DD,
                                           bool isEvenTimestep);
 
 __global__ void getSendFsPost27(real* DD,
-										   real* bufferFs,
-										   int* sendIndex,
+                                           real* bufferFs,
+                                           int* sendIndex,
                                            int buffmax,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
@@ -2456,8 +2456,8 @@ __global__ void getSendFsPost27(real* DD,
                                            bool isEvenTimestep);
 
 __global__ void setRecvFsPre27(real* DD,
-										  real* bufferFs,
-										  int* recvIndex,
+                                          real* bufferFs,
+                                          int* recvIndex,
                                           int buffmax,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -2466,8 +2466,8 @@ __global__ void setRecvFsPre27(real* DD,
                                           bool isEvenTimestep);
 
 __global__ void setRecvFsPost27(real* DD,
-										   real* bufferFs,
-										   int* recvIndex,
+                                           real* bufferFs,
+                                           int* recvIndex,
                                            int buffmax,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
@@ -2476,155 +2476,155 @@ __global__ void setRecvFsPost27(real* DD,
                                            bool isEvenTimestep);
 
 __global__ void getSendGsF3(
-	real* G6,
-	real* bufferGs,
-	int* sendIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* G6,
+    real* bufferGs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    bool isEvenTimestep);
 
 __global__ void setRecvGsF3(
-	real* G6,
-	real* bufferGs,
-	int* recvIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep);
+    real* G6,
+    real* bufferGs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    bool isEvenTimestep);
 
 __global__ void WallFunction27( 	real* vx,
-											real* vy,
-											real* vz,
-											real* DD,
-											int* k_Q,
-											real* QQ,
-											unsigned int numberOfBCnodes,
-											real om1,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep);
+                                            real* vy,
+                                            real* vz,
+                                            real* DD,
+                                            int* k_Q,
+                                            real* QQ,
+                                            unsigned int numberOfBCnodes,
+                                            real om1,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep);
 
 __global__ void LBSetOutputWallVelocitySP27( real* vxD,
-														real* vyD,
-														real* vzD,
-														real* vxWall,
-														real* vyWall,
-														real* vzWall,
-														int numberOfWallNodes,
-														int* kWallNodes,
-														real* rhoD,
-														real* pressD,
-														unsigned int* geoD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														unsigned int size_Mat,
-														real* DD,
-														bool isEvenTimestep);
+                                                        real* vyD,
+                                                        real* vzD,
+                                                        real* vxWall,
+                                                        real* vyWall,
+                                                        real* vzWall,
+                                                        int numberOfWallNodes,
+                                                        int* kWallNodes,
+                                                        real* rhoD,
+                                                        real* pressD,
+                                                        unsigned int* geoD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        unsigned int size_Mat,
+                                                        real* DD,
+                                                        bool isEvenTimestep);
 
 __global__ void GetVeloforForcing27( real* DD,
-												int* bcIndex,
-												int nonAtBC,
-												real* Vx,
-												real* Vy,
-												real* Vz,
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat,
-												bool isEvenTimestep);
+                                                int* bcIndex,
+                                                int nonAtBC,
+                                                real* Vx,
+                                                real* Vy,
+                                                real* Vz,
+                                                unsigned int* neighborX,
+                                                unsigned int* neighborY,
+                                                unsigned int* neighborZ,
+                                                unsigned int size_Mat,
+                                                bool isEvenTimestep);
 
 __global__ void InitParticles( real* coordX,
-										  real* coordY,
-										  real* coordZ,
-										  real* coordParticleXlocal,
-										  real* coordParticleYlocal,
-										  real* coordParticleZlocal,
-										  real* coordParticleXglobal,
-										  real* coordParticleYglobal,
-										  real* coordParticleZglobal,
-										  real* veloParticleX,
-										  real* veloParticleY,
-										  real* veloParticleZ,
-										  real* randArray,
-										  unsigned int* particleID,
-										  unsigned int* cellBaseID,
-										  unsigned int* bcMatD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int* neighborWSB,
-										  int level,
-									      unsigned int numberOfParticles,
-										  unsigned int size_Mat);
+                                          real* coordY,
+                                          real* coordZ,
+                                          real* coordParticleXlocal,
+                                          real* coordParticleYlocal,
+                                          real* coordParticleZlocal,
+                                          real* coordParticleXglobal,
+                                          real* coordParticleYglobal,
+                                          real* coordParticleZglobal,
+                                          real* veloParticleX,
+                                          real* veloParticleY,
+                                          real* veloParticleZ,
+                                          real* randArray,
+                                          unsigned int* particleID,
+                                          unsigned int* cellBaseID,
+                                          unsigned int* bcMatD,
+                                          unsigned int* neighborX,
+                                          unsigned int* neighborY,
+                                          unsigned int* neighborZ,
+                                          unsigned int* neighborWSB,
+                                          int level,
+                                          unsigned int numberOfParticles,
+                                          unsigned int size_Mat);
 
 __global__ void MoveParticles( real* coordX,
-										  real* coordY,
-										  real* coordZ,
-										  real* coordParticleXlocal,
-										  real* coordParticleYlocal,
-										  real* coordParticleZlocal,
-										  real* coordParticleXglobal,
-										  real* coordParticleYglobal,
-										  real* coordParticleZglobal,
-										  real* veloParticleX,
-										  real* veloParticleY,
-										  real* veloParticleZ,
-										  real* DD,
-										  real  omega,
-										  unsigned int* particleID,
-										  unsigned int* cellBaseID,
-										  unsigned int* bcMatD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int* neighborWSB,
-										  int level,
-										  unsigned int timestep,
-										  unsigned int numberOfTimesteps,
-									      unsigned int numberOfParticles,
-										  unsigned int size_Mat,
-										  bool isEvenTimestep);
+                                          real* coordY,
+                                          real* coordZ,
+                                          real* coordParticleXlocal,
+                                          real* coordParticleYlocal,
+                                          real* coordParticleZlocal,
+                                          real* coordParticleXglobal,
+                                          real* coordParticleYglobal,
+                                          real* coordParticleZglobal,
+                                          real* veloParticleX,
+                                          real* veloParticleY,
+                                          real* veloParticleZ,
+                                          real* DD,
+                                          real  omega,
+                                          unsigned int* particleID,
+                                          unsigned int* cellBaseID,
+                                          unsigned int* bcMatD,
+                                          unsigned int* neighborX,
+                                          unsigned int* neighborY,
+                                          unsigned int* neighborZ,
+                                          unsigned int* neighborWSB,
+                                          int level,
+                                          unsigned int timestep,
+                                          unsigned int numberOfTimesteps,
+                                          unsigned int numberOfParticles,
+                                          unsigned int size_Mat,
+                                          bool isEvenTimestep);
 
 __global__ void MoveParticlesWithoutBCs(   real* coordX,
-													  real* coordY,
-													  real* coordZ,
-													  real* coordParticleXlocal,
-													  real* coordParticleYlocal,
-													  real* coordParticleZlocal,
-													  real* coordParticleXglobal,
-													  real* coordParticleYglobal,
-													  real* coordParticleZglobal,
-													  real* veloParticleX,
-													  real* veloParticleY,
-													  real* veloParticleZ,
-													  real* DD,
-													  real  omega,
-													  unsigned int* particleID,
-													  unsigned int* cellBaseID,
-													  unsigned int* bcMatD,
-													  unsigned int* neighborX,
-													  unsigned int* neighborY,
-													  unsigned int* neighborZ,
-													  unsigned int* neighborWSB,
-													  int level,
-													  unsigned int timestep,
-													  unsigned int numberOfTimesteps,
-													  unsigned int numberOfParticles,
-													  unsigned int size_Mat,
-													  bool isEvenTimestep);
+                                                      real* coordY,
+                                                      real* coordZ,
+                                                      real* coordParticleXlocal,
+                                                      real* coordParticleYlocal,
+                                                      real* coordParticleZlocal,
+                                                      real* coordParticleXglobal,
+                                                      real* coordParticleYglobal,
+                                                      real* coordParticleZglobal,
+                                                      real* veloParticleX,
+                                                      real* veloParticleY,
+                                                      real* veloParticleZ,
+                                                      real* DD,
+                                                      real  omega,
+                                                      unsigned int* particleID,
+                                                      unsigned int* cellBaseID,
+                                                      unsigned int* bcMatD,
+                                                      unsigned int* neighborX,
+                                                      unsigned int* neighborY,
+                                                      unsigned int* neighborZ,
+                                                      unsigned int* neighborWSB,
+                                                      int level,
+                                                      unsigned int timestep,
+                                                      unsigned int numberOfTimesteps,
+                                                      unsigned int numberOfParticles,
+                                                      unsigned int size_Mat,
+                                                      bool isEvenTimestep);
 
 __global__ void initRandom(curandState* state);
 
 __global__ void generateRandomValues(curandState* state,
-												real* randArray);
+                                                real* randArray);
 
 __global__ void CalcTurbulenceIntensity(
    real* vxx,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 363fb76626b256205346a290a42a1da40e1c7445..ebab2b3e8a38bc221017ffb604d6f1c142afe1d9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -114,429 +114,429 @@ void KernelCasSPMSOHM27( unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumCompSRTSP27(
-	unsigned int numberOfThreads,
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
    LB_Kernel_Kum_New_Comp_SRT_SP_27 <<< grid.grid, grid.threads >>>(
-	   omega,
-	   bcMatD,
-	   neighborX,
-	   neighborY,
-	   neighborZ,
-	   DDStart,
-	   size_Mat,
-	   level,
-	   forces,
-	   EvenOrOdd);
+       omega,
+       bcMatD,
+       neighborX,
+       neighborY,
+       neighborZ,
+       DDStart,
+       size_Mat,
+       level,
+       forces,
+       EvenOrOdd);
       getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKum1hSP27(    unsigned int numberOfThreads,
-									real omega,
-									real deltaPhi,
-									real angularVelocity,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* DDStart,
-									int size_Mat,
-									bool EvenOrOdd)
+                                    real omega,
+                                    real deltaPhi,
+                                    real angularVelocity,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* coordX,
+                                    real* coordY,
+                                    real* coordZ,
+                                    real* DDStart,
+                                    int size_Mat,
+                                    bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-		LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(omega,
-													deltaPhi,
-													angularVelocity,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													coordX,
-													coordY,
-													coordZ,
-													DDStart,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
+        LB_Kernel_Kum_1h_SP_27<<< grid.grid, grid.threads >>>(omega,
+                                                    deltaPhi,
+                                                    angularVelocity,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    coordX,
+                                                    coordY,
+                                                    coordZ,
+                                                    DDStart,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+        getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelCascadeSP27(  unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
+                                    real s9,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* DD,
+                                    int size_Mat,
+                                    bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-		LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
+        LB_Kernel_Cascade_SP_27<<< grid.grid, grid.threads >>>(s9,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    DD,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+        getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumNewSP27(   unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
+                                    real s9,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* DD,
+                                    int size_Mat,
+                                    bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-		LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
+        LB_Kernel_Kum_New_SP_27<<< grid.grid, grid.threads >>>(s9,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    DD,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+        getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumNewCompSP27(unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									int size_Array,
-									int level,
-									real* forces,
-									bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
+                                    real s9,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* DD,
+                                    int size_Mat,
+                                    int size_Array,
+                                    int level,
+                                    real* forces,
+                                    bool EvenOrOdd)
+{
+    //int Grid = size_Array / numberOfThreads;
+    //dim3 grid(Grid, 1, 1);
+    //dim3 threads(numberOfThreads, 1, 1 );
 
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-		//LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>(	s9,
-		//													bcMatD,
-		//													neighborX,
-		//													neighborY,
-		//													neighborZ,
-		//													DD,
-		//													size_Mat,
-		//													level,
-		//													forces,
-		//													EvenOrOdd);
-		//getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
+        //LB_Kernel_Kum_New_Comp_SP_27<<< grid.grid, grid.threads >>>(	s9,
+        //													bcMatD,
+        //													neighborX,
+        //													neighborY,
+        //													neighborZ,
+        //													DD,
+        //													size_Mat,
+        //													level,
+        //													forces,
+        //													EvenOrOdd);
+        //getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed");
 }
 
 //////////////////////////////////////////////////////////////////////////
 void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
-																	real s9,
-																	unsigned int* bcMatD,
-																	unsigned int* neighborX,
-																	unsigned int* neighborY,
-																	unsigned int* neighborZ,
-																	real* DD,
-																	int size_Mat,
-																	int size_Array,
-																	int level,
-																	real* forces,
-																	bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
+                                                                    real s9,
+                                                                    unsigned int* bcMatD,
+                                                                    unsigned int* neighborX,
+                                                                    unsigned int* neighborY,
+                                                                    unsigned int* neighborZ,
+                                                                    real* DD,
+                                                                    int size_Mat,
+                                                                    int size_Array,
+                                                                    int level,
+                                                                    real* forces,
+                                                                    bool EvenOrOdd)
+{
+    //int Grid = size_Array / numberOfThreads;
+    //dim3 grid(Grid, 1, 1);
+    //dim3 threads(numberOfThreads, 1, 1 );
 
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-																						bcMatD,
-																						neighborX,
-																						neighborY,
-																						neighborZ,
-																						DD,
-																						size_Mat,
-																						level,
-																						forces,
-																						EvenOrOdd);
-		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
+    Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
+                                                                                        bcMatD,
+                                                                                        neighborX,
+                                                                                        neighborY,
+                                                                                        neighborZ,
+                                                                                        DD,
+                                                                                        size_Mat,
+                                                                                        level,
+                                                                                        forces,
+                                                                                        EvenOrOdd);
+        getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
-														real s9,
-														unsigned int* bcMatD,
-														unsigned int* neighborX,
-														unsigned int* neighborY,
-														unsigned int* neighborZ,
-														real* DD,
-														int size_Mat,
-														int size_Array,
-														int level,
-														real* forces,
-														bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
+                                                        real s9,
+                                                        unsigned int* bcMatD,
+                                                        unsigned int* neighborX,
+                                                        unsigned int* neighborY,
+                                                        unsigned int* neighborZ,
+                                                        real* DD,
+                                                        int size_Mat,
+                                                        int size_Array,
+                                                        int level,
+                                                        real* forces,
+                                                        bool EvenOrOdd)
+{
+    //int Grid = size_Array / numberOfThreads;
+    //dim3 grid(Grid, 1, 1);
+    //dim3 threads(numberOfThreads, 1, 1 );
 
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-																		bcMatD,
-																		neighborX,
-																		neighborY,
-																		neighborZ,
-																		DD,
-																		size_Mat,
-																		level,
-																		forces,
-																		EvenOrOdd);
-		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
+    Cumulant_One_preconditioned_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
+                                                                        bcMatD,
+                                                                        neighborX,
+                                                                        neighborY,
+                                                                        neighborZ,
+                                                                        DD,
+                                                                        size_Mat,
+                                                                        level,
+                                                                        forces,
+                                                                        EvenOrOdd);
+        getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CumulantOneChimCompSP27(unsigned int numberOfThreads,
-										real s9,
-										unsigned int* bcMatD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										real* DD,
-										int size_Mat,
-										int size_Array,
-										int level,
-										real* forces,
-										bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
+                                        real s9,
+                                        unsigned int* bcMatD,
+                                        unsigned int* neighborX,
+                                        unsigned int* neighborY,
+                                        unsigned int* neighborZ,
+                                        real* DD,
+                                        int size_Mat,
+                                        int size_Array,
+                                        int level,
+                                        real* forces,
+                                        bool EvenOrOdd)
+{
+    //int Grid = size_Array / numberOfThreads;
+    //dim3 grid(Grid, 1, 1);
+    //dim3 threads(numberOfThreads, 1, 1 );
 
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
-														bcMatD,
-														neighborX,
-														neighborY,
-														neighborZ,
-														DD,
-														size_Mat,
-														level,
-														forces,
-														EvenOrOdd);
-		getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
+    Cumulant_One_chim_Comp_SP_27 <<< grid.grid, grid.threads >>>(	s9,
+                                                        bcMatD,
+                                                        neighborX,
+                                                        neighborY,
+                                                        neighborZ,
+                                                        DD,
+                                                        size_Mat,
+                                                        level,
+                                                        forces,
+                                                        EvenOrOdd);
+        getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumIsoTestSP27(unsigned int numberOfThreads,
-									 real s9,
-									 unsigned int* bcMatD,
-									 unsigned int* neighborX,
-									 unsigned int* neighborY,
-									 unsigned int* neighborZ,
-									 real* DD,
-									 real* dxxUx,
-									 real* dyyUy,
-									 real* dzzUz,
-									 int size_Mat,
-									 bool EvenOrOdd)
+                                     real s9,
+                                     unsigned int* bcMatD,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     real* DD,
+                                     real* dxxUx,
+                                     real* dyyUy,
+                                     real* dzzUz,
+                                     int size_Mat,
+                                     bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													dxxUx,
-													dyyUy,
-													dzzUz,
-													size_Mat,
-													EvenOrOdd);
-	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
+    LB_Kernel_Kum_IsoTest_SP_27<<< grid.grid, grid.threads >>>(s9,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    DD,
+                                                    dxxUx,
+                                                    dyyUy,
+                                                    dzzUz,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+    getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelKumCompSP27(  unsigned int numberOfThreads,
-									real s9,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									real* DD,
-									int size_Mat,
-									bool EvenOrOdd)
+                                    real s9,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* DD,
+                                    int size_Mat,
+                                    bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-		LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(s9,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													size_Mat,
-													EvenOrOdd);
-		getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
+        LB_Kernel_Kum_Comp_SP_27<<< grid.grid, grid.threads >>>(s9,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    DD,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+        getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
-									   real omega,
-									   unsigned int* neighborX,
-									   unsigned int* neighborY,
-									   unsigned int* neighborZ,
-									   real* DD,
-									   int size_Mat,
-									   int level,
-									   real* forces,
-									   real porosity,
-									   real darcy,
-									   real forchheimer,
-									   unsigned int sizeOfPorousMedia,
-									   unsigned int* nodeIdsPorousMedia,
-									   bool EvenOrOdd)
+                                       real omega,
+                                       unsigned int* neighborX,
+                                       unsigned int* neighborY,
+                                       unsigned int* neighborZ,
+                                       real* DD,
+                                       int size_Mat,
+                                       int level,
+                                       real* forces,
+                                       real porosity,
+                                       real darcy,
+                                       real forchheimer,
+                                       unsigned int sizeOfPorousMedia,
+                                       unsigned int* nodeIdsPorousMedia,
+                                       bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(omega,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  level,
-														  forces,
-														  porosity,
-														  darcy,
-														  forchheimer,
-														  sizeOfPorousMedia,
-														  nodeIdsPorousMedia,
-														  EvenOrOdd);
-	getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
+    LB_Kernel_PM_Cum_One_Comp_SP_27 <<< grid.grid, grid.threads >>>(omega,
+                                                          neighborX,
+                                                          neighborY,
+                                                          neighborZ,
+                                                          DD,
+                                                          size_Mat,
+                                                          level,
+                                                          forces,
+                                                          porosity,
+                                                          darcy,
+                                                          forchheimer,
+                                                          sizeOfPorousMedia,
+                                                          nodeIdsPorousMedia,
+                                                          EvenOrOdd);
+    getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelWaleBySoniMalavCumAA2016CompSP27(
-	unsigned int numberOfThreads,
-	real s9,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int* neighborWSB,
-	real* veloX,
-	real* veloY,
-	real* veloZ,
-	real* DD,
-	real* turbulentViscosity,
-	int size_Mat,
-	int size_Array,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
-{
-	//int Grid = size_Array / numberOfThreads;
-	//dim3 grid(Grid, 1, 1);
-	//dim3 threads(numberOfThreads, 1, 1 );
+    unsigned int numberOfThreads,
+    real s9,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int* neighborWSB,
+    real* veloX,
+    real* veloY,
+    real* veloZ,
+    real* DD,
+    real* turbulentViscosity,
+    int size_Mat,
+    int size_Array,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+    //int Grid = size_Array / numberOfThreads;
+    //dim3 grid(Grid, 1, 1);
+    //dim3 threads(numberOfThreads, 1, 1 );
 
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid.grid, grid.threads >> >(
-		s9,
-		bcMatD,
-		neighborX,
-		neighborY,
-		neighborZ,
-		neighborWSB,
-		veloX,
-		veloY,
-		veloZ,
-		DD,
-		turbulentViscosity,
-		size_Mat,
-		level,
-		forces,
-		EvenOrOdd);
-	getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
+    LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 << < grid.grid, grid.threads >> >(
+        s9,
+        bcMatD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        neighborWSB,
+        veloX,
+        veloY,
+        veloZ,
+        DD,
+        turbulentViscosity,
+        size_Mat,
+        level,
+        forces,
+        EvenOrOdd);
+    getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelADincomp7(   unsigned int numberOfThreads,
-								   real diffusivity,
-								   unsigned int* bcMatD,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   real* DD,
-								   real* DD7,
-								   int size_Mat,
-								   bool EvenOrOdd)
+                                   real diffusivity,
+                                   unsigned int* bcMatD,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   real* DD,
+                                   real* DD7,
+                                   int size_Mat,
+                                   bool EvenOrOdd)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
       LB_Kernel_AD_Incomp_7<<< grid.grid, grid.threads >>>( diffusivity,
-												  bcMatD,
-												  neighborX,
-												  neighborY,
-												  neighborZ,
-												  DD,
-												  DD7,
-												  size_Mat,
-												  EvenOrOdd);
+                                                  bcMatD,
+                                                  neighborX,
+                                                  neighborY,
+                                                  neighborZ,
+                                                  DD,
+                                                  DD7,
+                                                  size_Mat,
+                                                  EvenOrOdd);
       getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void KernelADincomp27( unsigned int numberOfThreads,
-								  real diffusivity,
-								  unsigned int* bcMatD,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  real* DD,
-								  real* DD27,
-								  int size_Mat,
-								  bool EvenOrOdd)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( diffusivity,
-													bcMatD,
-													neighborX,
-													neighborY,
-													neighborZ,
-													DD,
-													DD27,
-													size_Mat,
-													EvenOrOdd);
-	getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
+                                  real diffusivity,
+                                  unsigned int* bcMatD,
+                                  unsigned int* neighborX,
+                                  unsigned int* neighborY,
+                                  unsigned int* neighborZ,
+                                  real* DD,
+                                  real* DD27,
+                                  int size_Mat,
+                                  bool EvenOrOdd)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LB_Kernel_AD_Incomp_27<<< grid.grid, grid.threads >>>( diffusivity,
+                                                    bcMatD,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    DD,
+                                                    DD27,
+                                                    size_Mat,
+                                                    EvenOrOdd);
+    getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void Init27( int myid,
@@ -558,7 +558,7 @@ void Init27( int myid,
    dim3 threads       ( grid_nx, 1, 1 );
    dim3 grid          ( grid_ny, grid_nz );   // Gitter fuer Kollision und Propagation
 
-	LBInit27<<< grid, threads >>> (  myid,
+    LBInit27<<< grid, threads >>> (  myid,
                                        numprocs,
                                        u0,
                                        geoD,
@@ -573,7 +573,7 @@ void Init27( int myid,
                                        DD,
                                        level,
                                        maxlevel);
-	getLastCudaError("LBInit27 execution failed");
+    getLastCudaError("LBInit27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void InitNonEqPartSP27( unsigned int numberOfThreads,
@@ -591,9 +591,9 @@ void InitNonEqPartSP27( unsigned int numberOfThreads,
                                    real omega,
                                    bool EvenOrOdd)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>( neighborX,
+    LBInitNonEqPartSP27<<< grid.grid, grid.threads >>>( neighborX,
                                                 neighborY,
                                                 neighborZ,
                                                 neighborWSB,
@@ -606,7 +606,7 @@ void InitNonEqPartSP27( unsigned int numberOfThreads,
                                                 DD,
                                                 omega,
                                                 EvenOrOdd);
-	getLastCudaError("LBInitNonEqPartSP27 execution failed");
+    getLastCudaError("LBInitNonEqPartSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void InitThS7(     unsigned int numberOfThreads,
@@ -622,9 +622,9 @@ void InitThS7(     unsigned int numberOfThreads,
                               real* DD7,
                               bool EvenOrOdd)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	InitAD7<<< grid.grid, grid.threads >>>( neighborX,
+    InitAD7<<< grid.grid, grid.threads >>>( neighborX,
                                        neighborY,
                                        neighborZ,
                                        geoD,
@@ -635,7 +635,7 @@ void InitThS7(     unsigned int numberOfThreads,
                                        size_Mat,
                                        DD7,
                                        EvenOrOdd);
-	getLastCudaError("InitAD7 execution failed");
+    getLastCudaError("InitAD7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void InitADDev27( unsigned int numberOfThreads,
@@ -651,9 +651,9 @@ void InitADDev27( unsigned int numberOfThreads,
                            real* DD27,
                            bool EvenOrOdd)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	InitAD27<<< grid.grid, grid.threads >>>(neighborX,
+    InitAD27<<< grid.grid, grid.threads >>>(neighborX,
                                        neighborY,
                                        neighborZ,
                                        geoD,
@@ -664,45 +664,45 @@ void InitADDev27( unsigned int numberOfThreads,
                                        size_Mat,
                                        DD27,
                                        EvenOrOdd);
-	getLastCudaError("InitAD27 execution failed");
+    getLastCudaError("InitAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void PostProcessorF3_2018Fehlberg(
-	unsigned int numberOfThreads,
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* rhoOut,
-	real* vxOut,
-	real* vyOut,
-	real* vzOut,
-	real* DDStart,
-	real* G6,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> (   omega,
-																  bcMatD,
-																  neighborX,
-																  neighborY,
-																  neighborZ,
-																  rhoOut,
-																  vxOut,
-																  vyOut,
-																  vzOut,
-																  DDStart,
-																  G6,
-																  size_Mat,
-																  level,
-																  forces,
-																  EvenOrOdd);
-	getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
+    unsigned int numberOfThreads,
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* rhoOut,
+    real* vxOut,
+    real* vyOut,
+    real* vzOut,
+    real* DDStart,
+    real* G6,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LB_PostProcessor_F3_2018_Fehlberg <<< grid.grid, grid.threads >>> (   omega,
+                                                                  bcMatD,
+                                                                  neighborX,
+                                                                  neighborY,
+                                                                  neighborZ,
+                                                                  rhoOut,
+                                                                  vxOut,
+                                                                  vyOut,
+                                                                  vzOut,
+                                                                  DDStart,
+                                                                  G6,
+                                                                  size_Mat,
+                                                                  level,
+                                                                  forces,
+                                                                  EvenOrOdd);
+    getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMac27( real* vxD,
@@ -723,7 +723,7 @@ void CalcMac27( real* vxD,
    dim3 threads       ( grid_nx, 1, 1 );
    dim3 grid          ( grid_ny, grid_nz );
 
-	LBCalcMac27<<< grid, threads >>> (  vxD,
+    LBCalcMac27<<< grid, threads >>> (  vxD,
                                           vyD,
                                           vzD,
                                           rhoD,
@@ -734,7 +734,7 @@ void CalcMac27( real* vxD,
                                           size_Mat,
                                           DD,
                                           isEvenTimestep);
-	getLastCudaError("LBCalcMac27 execution failed");
+    getLastCudaError("LBCalcMac27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMacSP27( real* vxD,
@@ -751,9 +751,9 @@ void CalcMacSP27( real* vxD,
                              real* DD,
                              bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	LBCalcMacSP27<<< grid.grid, grid.threads >>> (   vxD,
+    LBCalcMacSP27<<< grid.grid, grid.threads >>> (   vxD,
                                              vyD,
                                              vzD,
                                              rhoD,
@@ -765,38 +765,38 @@ void CalcMacSP27( real* vxD,
                                              size_Mat,
                                              DD,
                                              isEvenTimestep);
-	getLastCudaError("LBCalcMacSP27 execution failed");
+    getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMacCompSP27( real* vxD,
-								 real* vyD,
-								 real* vzD,
-								 real* rhoD,
-								 real* pressD,
-								 unsigned int* geoD,
-								 unsigned int* neighborX,
-								 unsigned int* neighborY,
-								 unsigned int* neighborZ,
-								 unsigned int size_Mat,
-								 unsigned int numberOfThreads,
-								 real* DD,
-								 bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalcMacCompSP27<<< grid.grid, grid.threads >>> (   vxD,
-												 vyD,
-												 vzD,
-												 rhoD,
-												 pressD,
-												 geoD,
-												 neighborX,
-												 neighborY,
-												 neighborZ,
-												 size_Mat,
-												 DD,
-												 isEvenTimestep);
-	getLastCudaError("LBCalcMacSP27 execution failed");
+                                 real* vyD,
+                                 real* vzD,
+                                 real* rhoD,
+                                 real* pressD,
+                                 unsigned int* geoD,
+                                 unsigned int* neighborX,
+                                 unsigned int* neighborY,
+                                 unsigned int* neighborZ,
+                                 unsigned int size_Mat,
+                                 unsigned int numberOfThreads,
+                                 real* DD,
+                                 bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalcMacCompSP27<<< grid.grid, grid.threads >>> (   vxD,
+                                                 vyD,
+                                                 vzD,
+                                                 rhoD,
+                                                 pressD,
+                                                 geoD,
+                                                 neighborX,
+                                                 neighborY,
+                                                 neighborZ,
+                                                 size_Mat,
+                                                 DD,
+                                                 isEvenTimestep);
+    getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMacThS7(  real* Conc,
@@ -809,9 +809,9 @@ void CalcMacThS7(  real* Conc,
                               real* DD7,
                               bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	CalcConc7<<< grid.grid, grid.threads >>> (Conc,
+    CalcConc7<<< grid.grid, grid.threads >>> (Conc,
                                           geoD,
                                           neighborX,
                                           neighborY,
@@ -819,61 +819,61 @@ void CalcMacThS7(  real* Conc,
                                           size_Mat,
                                           DD7,
                                           isEvenTimestep);
-	getLastCudaError("CalcConc7 execution failed");
+    getLastCudaError("CalcConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void PlaneConcThS7(real* Conc,
-							  int* kPC,
-							  unsigned int numberOfPointskPC,
-							  unsigned int* geoD,
-							  unsigned int* neighborX,
-							  unsigned int* neighborY,
-							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
+                              int* kPC,
+                              unsigned int numberOfPointskPC,
+                              unsigned int* geoD,
+                              unsigned int* neighborX,
+                              unsigned int* neighborY,
+                              unsigned int* neighborZ,
+                              unsigned int size_Mat,
                               unsigned int numberOfThreads,
-							  real* DD7,
-							  bool isEvenTimestep)
+                              real* DD7,
+                              bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
 
-	GetPlaneConc7<<< grid.grid, grid.threads >>> (	Conc,
-												kPC,
-												numberOfPointskPC,
-												geoD,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												DD7,
-												isEvenTimestep);
-	getLastCudaError("GetPlaneConc7 execution failed");
+    GetPlaneConc7<<< grid.grid, grid.threads >>> (	Conc,
+                                                kPC,
+                                                numberOfPointskPC,
+                                                geoD,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                DD7,
+                                                isEvenTimestep);
+    getLastCudaError("GetPlaneConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void PlaneConcThS27(real* Conc,
-							   int* kPC,
-							   unsigned int numberOfPointskPC,
-							   unsigned int* geoD,
-							   unsigned int* neighborX,
-							   unsigned int* neighborY,
-							   unsigned int* neighborZ,
-							   unsigned int size_Mat,
+                               int* kPC,
+                               unsigned int numberOfPointskPC,
+                               unsigned int* geoD,
+                               unsigned int* neighborX,
+                               unsigned int* neighborY,
+                               unsigned int* neighborZ,
+                               unsigned int size_Mat,
                                unsigned int numberOfThreads,
-							   real* DD27,
-							   bool isEvenTimestep)
+                               real* DD27,
+                               bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskPC);
 
-	GetPlaneConc27<<< grid.grid, grid.threads >>> (	Conc,
-												kPC,
-												numberOfPointskPC,
-												geoD,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												DD27,
-												isEvenTimestep);
-	getLastCudaError("GetPlaneConc27 execution failed");
+    GetPlaneConc27<<< grid.grid, grid.threads >>> (	Conc,
+                                                kPC,
+                                                numberOfPointskPC,
+                                                geoD,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                DD27,
+                                                isEvenTimestep);
+    getLastCudaError("GetPlaneConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcConcentration27( unsigned int numberOfThreads,
@@ -886,9 +886,9 @@ void CalcConcentration27( unsigned int numberOfThreads,
                                      real* DD27,
                                      bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	CalcConc27<<< grid.grid, grid.threads >>> (  Conc,
+    CalcConc27<<< grid.grid, grid.threads >>> (  Conc,
                                              geoD,
                                              neighborX,
                                              neighborY,
@@ -896,7 +896,7 @@ void CalcConcentration27( unsigned int numberOfThreads,
                                              size_Mat,
                                              DD27,
                                              isEvenTimestep);
-	getLastCudaError("CalcConc27 execution failed");
+    getLastCudaError("CalcConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMedSP27(  real* vxD,
@@ -913,9 +913,9 @@ void CalcMedSP27(  real* vxD,
                               real* DD,
                               bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	LBCalcMedSP27<<< grid.grid, grid.threads >>> (   vxD,
+    LBCalcMedSP27<<< grid.grid, grid.threads >>> (   vxD,
                                              vyD,
                                              vzD,
                                              rhoD,
@@ -927,75 +927,75 @@ void CalcMedSP27(  real* vxD,
                                              size_Mat,
                                              DD,
                                              isEvenTimestep);
-	getLastCudaError("LBCalcMedSP27 execution failed");
+    getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMedCompSP27(  real* vxD,
-								  real* vyD,
-								  real* vzD,
-								  real* rhoD,
-								  real* pressD,
-								  unsigned int* geoD,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  unsigned int numberOfThreads,
-								  real* DD,
-								  bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalcMedCompSP27<<< grid.grid, grid.threads >>> (   vxD,
-												 vyD,
-												 vzD,
-												 rhoD,
-												 pressD,
-												 geoD,
-												 neighborX,
-												 neighborY,
-												 neighborZ,
-												 size_Mat,
-												 DD,
-												 isEvenTimestep);
-	getLastCudaError("LBCalcMedSP27 execution failed");
+                                  real* vyD,
+                                  real* vzD,
+                                  real* rhoD,
+                                  real* pressD,
+                                  unsigned int* geoD,
+                                  unsigned int* neighborX,
+                                  unsigned int* neighborY,
+                                  unsigned int* neighborZ,
+                                  unsigned int size_Mat,
+                                  unsigned int numberOfThreads,
+                                  real* DD,
+                                  bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalcMedCompSP27<<< grid.grid, grid.threads >>> (   vxD,
+                                                 vyD,
+                                                 vzD,
+                                                 rhoD,
+                                                 pressD,
+                                                 geoD,
+                                                 neighborX,
+                                                 neighborY,
+                                                 neighborZ,
+                                                 size_Mat,
+                                                 DD,
+                                                 isEvenTimestep);
+    getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMedCompAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int* geoD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	real* DD,
-	real* DD_AD,
-	bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		concD,
-		geoD,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		DD,
-		DD_AD,
-		isEvenTimestep);
-	getLastCudaError("LBCalcMedAD27 execution failed");
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int* geoD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    unsigned int numberOfThreads,
+    real* DD,
+    real* DD_AD,
+    bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalcMedCompAD27 <<< grid.grid, grid.threads >>> (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        geoD,
+        neighborX,
+        neighborY,
+        neighborZ,
+        size_Mat,
+        DD,
+        DD_AD,
+        isEvenTimestep);
+    getLastCudaError("LBCalcMedAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcMacMedSP27(  real* vxD,
@@ -1012,9 +1012,9 @@ void CalcMacMedSP27(  real* vxD,
                                  unsigned int numberOfThreads,
                                  bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
-	LBCalcMacMedSP27<<< grid.grid, grid.threads >>> (   vxD,
+    LBCalcMacMedSP27<<< grid.grid, grid.threads >>> (   vxD,
                                                 vyD,
                                                 vzD,
                                                 rhoD,
@@ -1026,270 +1026,270 @@ void CalcMacMedSP27(  real* vxD,
                                                 tdiff,
                                                 size_Mat,
                                                 isEvenTimestep);
-	getLastCudaError("LBCalcMacMedSP27 execution failed");
+    getLastCudaError("LBCalcMacMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ResetMedianValuesSP27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	bool isEvenTimestep)
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    unsigned int size_Mat,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
 
 
-	LBResetMedianValuesSP27 << < grid.grid, grid.threads >> > (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("LBResetMedianValuesSP27 execution failed");
+    LBResetMedianValuesSP27 << < grid.grid, grid.threads >> > (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ResetMedianValuesAD27(
-	real* vxD,
-	real* vyD,
-	real* vzD,
-	real* rhoD,
-	real* pressD,
-	real* concD,
-	unsigned int size_Mat,
-	unsigned int numberOfThreads,
-	bool isEvenTimestep)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBResetMedianValuesAD27 << < grid.grid, grid.threads >> > (
-		vxD,
-		vyD,
-		vzD,
-		rhoD,
-		pressD,
-		concD,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("LBResetMedianValuesAD27 execution failed");
+    real* vxD,
+    real* vyD,
+    real* vzD,
+    real* rhoD,
+    real* pressD,
+    real* concD,
+    unsigned int size_Mat,
+    unsigned int numberOfThreads,
+    bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBResetMedianValuesAD27 << < grid.grid, grid.threads >> > (
+        vxD,
+        vyD,
+        vzD,
+        rhoD,
+        pressD,
+        concD,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("LBResetMedianValuesAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
-										 real* kyzFromfcNEQ,
-										 real* kxzFromfcNEQ,
-										 real* kxxMyyFromfcNEQ,
-										 real* kxxMzzFromfcNEQ,
-										 unsigned int* geoD,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
-										 unsigned int numberOfThreads,
-										 real* DD,
-										 bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> (  kxyFromfcNEQ,
-														 kyzFromfcNEQ,
-														 kxzFromfcNEQ,
-														 kxxMyyFromfcNEQ,
-														 kxxMzzFromfcNEQ,
-														 geoD,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 size_Mat,
-														 DD,
-														 isEvenTimestep);
-	getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
+                                         real* kyzFromfcNEQ,
+                                         real* kxzFromfcNEQ,
+                                         real* kxxMyyFromfcNEQ,
+                                         real* kxxMzzFromfcNEQ,
+                                         unsigned int* geoD,
+                                         unsigned int* neighborX,
+                                         unsigned int* neighborY,
+                                         unsigned int* neighborZ,
+                                         unsigned int size_Mat,
+                                         unsigned int numberOfThreads,
+                                         real* DD,
+                                         bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalc2ndMomentsIncompSP27<<< grid.grid, grid.threads >>> (  kxyFromfcNEQ,
+                                                         kyzFromfcNEQ,
+                                                         kxzFromfcNEQ,
+                                                         kxxMyyFromfcNEQ,
+                                                         kxxMzzFromfcNEQ,
+                                                         geoD,
+                                                         neighborX,
+                                                         neighborY,
+                                                         neighborZ,
+                                                         size_Mat,
+                                                         DD,
+                                                         isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
-										real* kyzFromfcNEQ,
-										real* kxzFromfcNEQ,
-										real* kxxMyyFromfcNEQ,
-										real* kxxMzzFromfcNEQ,
-										unsigned int* geoD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										unsigned int size_Mat,
-										unsigned int numberOfThreads,
-										real* DD,
-										bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (kxyFromfcNEQ,
-													 kyzFromfcNEQ,
-													 kxzFromfcNEQ,
-													 kxxMyyFromfcNEQ,
-													 kxxMzzFromfcNEQ,
-													 geoD,
-													 neighborX,
-													 neighborY,
-													 neighborZ,
-													 size_Mat,
-													 DD,
-													 isEvenTimestep);
-	getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
+                                        real* kyzFromfcNEQ,
+                                        real* kxzFromfcNEQ,
+                                        real* kxxMyyFromfcNEQ,
+                                        real* kxxMzzFromfcNEQ,
+                                        unsigned int* geoD,
+                                        unsigned int* neighborX,
+                                        unsigned int* neighborY,
+                                        unsigned int* neighborZ,
+                                        unsigned int size_Mat,
+                                        unsigned int numberOfThreads,
+                                        real* DD,
+                                        bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalc2ndMomentsCompSP27<<< grid.grid, grid.threads >>> (kxyFromfcNEQ,
+                                                     kyzFromfcNEQ,
+                                                     kxzFromfcNEQ,
+                                                     kxxMyyFromfcNEQ,
+                                                     kxxMzzFromfcNEQ,
+                                                     geoD,
+                                                     neighborX,
+                                                     neighborY,
+                                                     neighborZ,
+                                                     size_Mat,
+                                                     DD,
+                                                     isEvenTimestep);
+    getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void Calc3rdMomentsIncompSP27(real* CUMbbb,
-										 real* CUMabc,
-										 real* CUMbac,
-										 real* CUMbca,
-										 real* CUMcba,
-										 real* CUMacb,
-										 real* CUMcab,
-										 unsigned int* geoD,
-										 unsigned int* neighborX,
-										 unsigned int* neighborY,
-										 unsigned int* neighborZ,
-										 unsigned int size_Mat,
-										 unsigned int numberOfThreads,
-										 real* DD,
-										 bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> (  CUMbbb,
-														 CUMabc,
-														 CUMbac,
-														 CUMbca,
-														 CUMcba,
-														 CUMacb,
-														 CUMcab,
-														 geoD,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 DD,
-														 size_Mat,
-														 isEvenTimestep);
-	getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
+                                         real* CUMabc,
+                                         real* CUMbac,
+                                         real* CUMbca,
+                                         real* CUMcba,
+                                         real* CUMacb,
+                                         real* CUMcab,
+                                         unsigned int* geoD,
+                                         unsigned int* neighborX,
+                                         unsigned int* neighborY,
+                                         unsigned int* neighborZ,
+                                         unsigned int size_Mat,
+                                         unsigned int numberOfThreads,
+                                         real* DD,
+                                         bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalc3rdMomentsIncompSP27<<< grid.grid, grid.threads >>> (  CUMbbb,
+                                                         CUMabc,
+                                                         CUMbac,
+                                                         CUMbca,
+                                                         CUMcba,
+                                                         CUMacb,
+                                                         CUMcab,
+                                                         geoD,
+                                                         neighborX,
+                                                         neighborY,
+                                                         neighborZ,
+                                                         DD,
+                                                         size_Mat,
+                                                         isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void Calc3rdMomentsCompSP27( real* CUMbbb,
-										real* CUMabc,
-										real* CUMbac,
-										real* CUMbca,
-										real* CUMcba,
-										real* CUMacb,
-										real* CUMcab,
-										unsigned int* geoD,
-										unsigned int* neighborX,
-										unsigned int* neighborY,
-										unsigned int* neighborZ,
-										unsigned int size_Mat,
-										unsigned int numberOfThreads,
-										real* DD,
-										bool isEvenTimestep)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (CUMbbb,
-													 CUMabc,
-													 CUMbac,
-													 CUMbca,
-													 CUMcba,
-													 CUMacb,
-													 CUMcab,
-													 geoD,
-													 neighborX,
-													 neighborY,
-													 neighborZ,
-													 DD,
-													 size_Mat,
-													 isEvenTimestep);
-	getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
+                                        real* CUMabc,
+                                        real* CUMbac,
+                                        real* CUMbca,
+                                        real* CUMcba,
+                                        real* CUMacb,
+                                        real* CUMcab,
+                                        unsigned int* geoD,
+                                        unsigned int* neighborX,
+                                        unsigned int* neighborY,
+                                        unsigned int* neighborZ,
+                                        unsigned int size_Mat,
+                                        unsigned int numberOfThreads,
+                                        real* DD,
+                                        bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalc3rdMomentsCompSP27<<< grid.grid, grid.threads >>> (CUMbbb,
+                                                     CUMabc,
+                                                     CUMbac,
+                                                     CUMbca,
+                                                     CUMcba,
+                                                     CUMacb,
+                                                     CUMcab,
+                                                     geoD,
+                                                     neighborX,
+                                                     neighborY,
+                                                     neighborZ,
+                                                     DD,
+                                                     size_Mat,
+                                                     isEvenTimestep);
+    getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcHigherMomentsIncompSP27(real* CUMcbb,
-											real* CUMbcb,
-											real* CUMbbc,
-											real* CUMcca,
-											real* CUMcac,
-											real* CUMacc,
-											real* CUMbcc,
-											real* CUMcbc,
-											real* CUMccb,
-											real* CUMccc,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											unsigned int numberOfThreads,
-											real* DD,
-											bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (CUMcbb,
-														  CUMbcb,
-														  CUMbbc,
-														  CUMcca,
-														  CUMcac,
-														  CUMacc,
-														  CUMbcc,
-														  CUMcbc,
-														  CUMccb,
-														  CUMccc,
-														  geoD,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  isEvenTimestep);
-	getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
+                                            real* CUMbcb,
+                                            real* CUMbbc,
+                                            real* CUMcca,
+                                            real* CUMcac,
+                                            real* CUMacc,
+                                            real* CUMbcc,
+                                            real* CUMcbc,
+                                            real* CUMccb,
+                                            real* CUMccc,
+                                            unsigned int* geoD,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            unsigned int numberOfThreads,
+                                            real* DD,
+                                            bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalcHigherMomentsIncompSP27<<< grid.grid, grid.threads >>> (CUMcbb,
+                                                          CUMbcb,
+                                                          CUMbbc,
+                                                          CUMcca,
+                                                          CUMcac,
+                                                          CUMacc,
+                                                          CUMbcc,
+                                                          CUMcbc,
+                                                          CUMccb,
+                                                          CUMccc,
+                                                          geoD,
+                                                          neighborX,
+                                                          neighborY,
+                                                          neighborZ,
+                                                          DD,
+                                                          size_Mat,
+                                                          isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcHigherMomentsCompSP27(  real* CUMcbb,
-											real* CUMbcb,
-											real* CUMbbc,
-											real* CUMcca,
-											real* CUMcac,
-											real* CUMacc,
-											real* CUMbcc,
-											real* CUMcbc,
-											real* CUMccb,
-											real* CUMccc,
-											unsigned int* geoD,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											unsigned int numberOfThreads,
-											real* DD,
-											bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
-
-	LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> (  CUMcbb,
-														  CUMbcb,
-														  CUMbbc,
-														  CUMcca,
-														  CUMcac,
-														  CUMacc,
-														  CUMbcc,
-														  CUMcbc,
-														  CUMccb,
-														  CUMccc,
-														  geoD,
-														  neighborX,
-														  neighborY,
-														  neighborZ,
-														  DD,
-														  size_Mat,
-														  isEvenTimestep);
-	getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
+                                            real* CUMbcb,
+                                            real* CUMbbc,
+                                            real* CUMcca,
+                                            real* CUMcac,
+                                            real* CUMacc,
+                                            real* CUMbcc,
+                                            real* CUMcbc,
+                                            real* CUMccb,
+                                            real* CUMccc,
+                                            unsigned int* geoD,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            unsigned int numberOfThreads,
+                                            real* DD,
+                                            bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
+
+    LBCalcHigherMomentsCompSP27<<< grid.grid, grid.threads >>> (  CUMcbb,
+                                                          CUMbcb,
+                                                          CUMbbc,
+                                                          CUMcca,
+                                                          CUMcac,
+                                                          CUMacc,
+                                                          CUMbcc,
+                                                          CUMcbc,
+                                                          CUMccb,
+                                                          CUMccc,
+                                                          geoD,
+                                                          neighborX,
+                                                          neighborY,
+                                                          neighborZ,
+                                                          DD,
+                                                          size_Mat,
+                                                          isEvenTimestep);
+    getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void LBCalcMeasurePoints27(real* vxMP,
@@ -1309,9 +1309,9 @@ void LBCalcMeasurePoints27(real* vxMP,
                                       unsigned int numberOfThreads,
                                       bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfPointskMP);
 
-	LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (vxMP,
+    LBCalcMeasurePoints<<< grid.grid, grid.threads >>> (vxMP,
                                                 vyMP,
                                                 vzMP,
                                                 rhoMP,
@@ -1326,7 +1326,7 @@ void LBCalcMeasurePoints27(real* vxMP,
                                                 size_Mat,
                                                 DD,
                                                 isEvenTimestep);
-	getLastCudaError("LBCalcMeasurePoints execution failed");
+    getLastCudaError("LBCalcMeasurePoints execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void BcPress27( int nx,
@@ -1342,10 +1342,10 @@ void BcPress27( int nx,
                            unsigned int size_Mat,
                            bool isEvenTimestep)
 {
-	dim3 threads       ( grid_nx, 1, 1 );
-	dim3 grid          ( grid_ny, 1 );
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
 
-	LB_BC_Press_East27<<< grid, threads >>> ( nx,
+    LB_BC_Press_East27<<< grid, threads >>> ( nx,
                                                 ny,
                                                 tz,
                                                 bcMatD,
@@ -1355,7 +1355,7 @@ void BcPress27( int nx,
                                                 DD,
                                                 size_Mat,
                                                 isEvenTimestep);
-	getLastCudaError("LB_BC_Press_East27 execution failed");
+    getLastCudaError("LB_BC_Press_East27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void BcVel27(int nx,
@@ -1374,10 +1374,10 @@ void BcVel27(int nx,
                         real u0x,
                         real om)
 {
-	dim3 threads       ( grid_nx, 1, 1 );
-	dim3 grid          ( grid_ny, 1 );
+    dim3 threads       ( grid_nx, 1, 1 );
+    dim3 grid          ( grid_ny, 1 );
 
-	LB_BC_Vel_West_27<<< grid, threads >>> (  nx,
+    LB_BC_Vel_West_27<<< grid, threads >>> (  nx,
                                                 ny,
                                                 nz,
                                                 itz,
@@ -1392,7 +1392,7 @@ void BcVel27(int nx,
                                                 grid_nx,
                                                 grid_ny,
                                                 om);
-	getLastCudaError("LB_BC_Vel_West_27 execution failed");
+    getLastCudaError("LB_BC_Vel_West_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressDev7( unsigned int numberOfThreads,
@@ -1411,7 +1411,7 @@ void QADPressDev7( unsigned int numberOfThreads,
                               unsigned int size_Mat,
                               bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADPress7<<< grid.grid, grid.threads >>>( DD,
                                        DD7,
@@ -1427,7 +1427,7 @@ void QADPressDev7( unsigned int numberOfThreads,
                                        neighborZ,
                                        size_Mat,
                                        isEvenTimestep);
-	getLastCudaError("QADPress7 execution failed");
+    getLastCudaError("QADPress7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressDev27(unsigned int numberOfThreads,
@@ -1446,7 +1446,7 @@ void QADPressDev27(unsigned int numberOfThreads,
                               unsigned int size_Mat,
                               bool isEvenTimestep)
 {
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADPress27<<< grid.grid, grid.threads >>>(   DD,
                                           DD27,
@@ -1462,39 +1462,39 @@ void QADPressDev27(unsigned int numberOfThreads,
                                           neighborZ,
                                           size_Mat,
                                           isEvenTimestep);
-	getLastCudaError("QADPress27 execution failed");
+    getLastCudaError("QADPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressNEQNeighborDev27(
-											unsigned int numberOfThreads,
-											real* DD,
-											real* DD27,
-											int* k_Q,
-											int* k_N,
-											int numberOfBCnodes,
-											unsigned int* neighborX,
-											unsigned int* neighborY,
-											unsigned int* neighborZ,
-											unsigned int size_Mat,
-											bool isEvenTimestep
-										)
-{
-
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-	QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>(
-												DD,
-												DD27,
-												k_Q,
-												k_N,
-												numberOfBCnodes,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep
-											  );
-   	getLastCudaError("QADPressNEQNeighbor27 execution failed");
+                                            unsigned int numberOfThreads,
+                                            real* DD,
+                                            real* DD27,
+                                            int* k_Q,
+                                            int* k_N,
+                                            int numberOfBCnodes,
+                                            unsigned int* neighborX,
+                                            unsigned int* neighborY,
+                                            unsigned int* neighborZ,
+                                            unsigned int size_Mat,
+                                            bool isEvenTimestep
+                                        )
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    QADPressNEQNeighbor27<<< grid.grid, grid.threads >>>(
+                                                DD,
+                                                DD27,
+                                                k_Q,
+                                                k_N,
+                                                numberOfBCnodes,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                isEvenTimestep
+                                              );
+       getLastCudaError("QADPressNEQNeighbor27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADVelDev7(unsigned int numberOfThreads,
@@ -1513,9 +1513,9 @@ void QADVelDev7(unsigned int numberOfThreads,
                            unsigned int size_Mat,
                            bool isEvenTimestep)
 {
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-      QADVel7<<< grid.grid, grid.threads >>> (  
+      QADVel7<<< grid.grid, grid.threads >>> (
                                        DD,
                                        DD7,
                                        temp,
@@ -1530,7 +1530,7 @@ void QADVelDev7(unsigned int numberOfThreads,
                                        neighborZ,
                                        size_Mat,
                                        isEvenTimestep);
-	getLastCudaError("QADVel7 execution failed");
+    getLastCudaError("QADVel7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADVelDev27(  unsigned int numberOfThreads,
@@ -1583,7 +1583,7 @@ void QADDev7(unsigned int numberOfThreads,
                         unsigned int size_Mat,
                         bool isEvenTimestep)
 {
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QAD7<<< grid.grid, grid.threads >>> (     DD,
                                        DD7,
@@ -1637,74 +1637,74 @@ void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 
 //////////////////////////////////////////////////////////////////////////
 void ADSlipVelDevComp(
-	uint numberOfThreads,
-	real * normalX,
-	real * normalY,
-	real * normalZ,
-	real * distributions,
-	real * distributionsAD,
-	int* QindexArray,
-	real * Qarrays,
-	uint numberOfBCnodes,
-	real omegaDiffusivity,
-	uint * neighborX,
-	uint * neighborY,
-	uint * neighborZ,
-	uint size_Mat,
-	bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-	AD_SlipVelDeviceComp << < grid.grid, grid.threads >> > (
-		normalX,
-		normalY,
-		normalZ,
-		distributions,
-		distributionsAD,
-		QindexArray,
-		Qarrays,
-		numberOfBCnodes,
-		omegaDiffusivity,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("AD_SlipVelDeviceComp execution failed");
+    uint numberOfThreads,
+    real * normalX,
+    real * normalY,
+    real * normalZ,
+    real * distributions,
+    real * distributionsAD,
+    int* QindexArray,
+    real * Qarrays,
+    uint numberOfBCnodes,
+    real omegaDiffusivity,
+    uint * neighborX,
+    uint * neighborY,
+    uint * neighborZ,
+    uint size_Mat,
+    bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    AD_SlipVelDeviceComp << < grid.grid, grid.threads >> > (
+        normalX,
+        normalY,
+        normalZ,
+        distributions,
+        distributionsAD,
+        QindexArray,
+        Qarrays,
+        numberOfBCnodes,
+        omegaDiffusivity,
+        neighborX,
+        neighborY,
+        neighborZ,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("AD_SlipVelDeviceComp execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 
 void QADDirichletDev27( unsigned int numberOfThreads,
-								   real* DD,
-								   real* DD27,
-								   real* temp,
-								   real diffusivity,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-   	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+                                   real* DD,
+                                   real* DD27,
+                                   real* temp,
+                                   real diffusivity,
+                                   int* k_Q,
+                                   real* QQ,
+                                   unsigned int numberOfBCnodes,
+                                   real om1,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep)
+{
+       vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADDirichlet27<<< grid.grid, grid.threads >>> (
-											   DD,
-											   DD27,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
+                                               DD,
+                                               DD27,
+                                               temp,
+                                               diffusivity,
+                                               k_Q,
+                                               QQ,
+                                               numberOfBCnodes,
+                                               om1,
+                                               neighborX,
+                                               neighborY,
+                                               neighborZ,
+                                               size_Mat,
+                                               isEvenTimestep);
       getLastCudaError("QADDirichletDev27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -1742,212 +1742,212 @@ void QADBBDev27(unsigned int numberOfThreads,
 }
 //////////////////////////////////////////////////////////////////////////
 void QNoSlipADincompDev7(unsigned int numberOfThreads,
-									real* DD,
-									real* DD7,
-									real* temp,
-									real diffusivity,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* DD,
+                                    real* DD7,
+                                    real* temp,
+                                    real diffusivity,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QNoSlipADincomp7<<< grid.grid, grid.threads >>> (
-											   DD,
-											   DD7,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
+                                               DD,
+                                               DD7,
+                                               temp,
+                                               diffusivity,
+                                               k_Q,
+                                               QQ,
+                                               numberOfBCnodes,
+                                               om1,
+                                               neighborX,
+                                               neighborY,
+                                               neighborZ,
+                                               size_Mat,
+                                               isEvenTimestep);
       getLastCudaError("QNoSlipADincomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QNoSlipADincompDev27(  unsigned int numberOfThreads,
-									   real* DD,
-									   real* DD27,
-									   real* temp,
-									   real diffusivity,
-									   int* k_Q,
-									   real* QQ,
-									   unsigned int numberOfBCnodes,
-									   real om1,
-									   unsigned int* neighborX,
-									   unsigned int* neighborY,
-									   unsigned int* neighborZ,
-									   unsigned int size_Mat,
-									   bool isEvenTimestep)
+                                       real* DD,
+                                       real* DD27,
+                                       real* temp,
+                                       real diffusivity,
+                                       int* k_Q,
+                                       real* QQ,
+                                       unsigned int numberOfBCnodes,
+                                       real om1,
+                                       unsigned int* neighborX,
+                                       unsigned int* neighborY,
+                                       unsigned int* neighborZ,
+                                       unsigned int size_Mat,
+                                       bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QNoSlipADincomp27<<< grid.grid, grid.threads >>> (
-											   DD,
-											   DD27,
-											   temp,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
+                                               DD,
+                                               DD27,
+                                               temp,
+                                               diffusivity,
+                                               k_Q,
+                                               QQ,
+                                               numberOfBCnodes,
+                                               om1,
+                                               neighborX,
+                                               neighborY,
+                                               neighborZ,
+                                               size_Mat,
+                                               isEvenTimestep);
       getLastCudaError("QNoSlipADincomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADVeloIncompDev7( unsigned int numberOfThreads,
-								   real* DD,
-								   real* DD7,
-								   real* temp,
-								   real* velo,
-								   real diffusivity,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
+                                   real* DD,
+                                   real* DD7,
+                                   real* temp,
+                                   real* velo,
+                                   real diffusivity,
+                                   int* k_Q,
+                                   real* QQ,
+                                   unsigned int numberOfBCnodes,
+                                   real om1,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADVeloIncomp7<<< grid.grid, grid.threads >>> ( DD,
-	  										   DD7,
-											   temp,
-											   velo,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
+                                                 DD7,
+                                               temp,
+                                               velo,
+                                               diffusivity,
+                                               k_Q,
+                                               QQ,
+                                               numberOfBCnodes,
+                                               om1,
+                                               neighborX,
+                                               neighborY,
+                                               neighborZ,
+                                               size_Mat,
+                                               isEvenTimestep);
       getLastCudaError("QADVeloIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADVeloIncompDev27(   unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD27,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* DD,
+                                      real* DD27,
+                                      real* temp,
+                                      real* velo,
+                                      real diffusivity,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADVeloIncomp27<<< grid.grid, grid.threads >>> (
-											  DD,
-											  DD27,
-											  temp,
-											  velo,
-											  diffusivity,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
+                                              DD,
+                                              DD27,
+                                              temp,
+                                              velo,
+                                              diffusivity,
+                                              k_Q,
+                                              QQ,
+                                              numberOfBCnodes,
+                                              om1,
+                                              neighborX,
+                                              neighborY,
+                                              neighborZ,
+                                              size_Mat,
+                                              isEvenTimestep);
       getLastCudaError("QADVeloIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressIncompDev7( unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD7,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* DD,
+                                      real* DD7,
+                                      real* temp,
+                                      real* velo,
+                                      real diffusivity,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QADPressIncomp7<<< grid.grid, grid.threads >>>(
-											   DD,
-											   DD7,
-											   temp,
-											   velo,
-											   diffusivity,
-											   k_Q,
-											   QQ,
-											   numberOfBCnodes,
-											   om1,
-											   neighborX,
-											   neighborY,
-											   neighborZ,
-											   size_Mat,
-											   isEvenTimestep);
+                                               DD,
+                                               DD7,
+                                               temp,
+                                               velo,
+                                               diffusivity,
+                                               k_Q,
+                                               QQ,
+                                               numberOfBCnodes,
+                                               om1,
+                                               neighborX,
+                                               neighborY,
+                                               neighborZ,
+                                               size_Mat,
+                                               isEvenTimestep);
       getLastCudaError("QADPressIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QADPressIncompDev27(  unsigned int numberOfThreads,
-									  real* DD,
-									  real* DD27,
-									  real* temp,
-									  real* velo,
-									  real diffusivity,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* DD,
+                                      real* DD27,
+                                      real* temp,
+                                      real* velo,
+                                      real diffusivity,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-      QADPressIncomp27<<< grid.grid, grid.threads >>>(DD, 
-	  										  DD27, 
-											  temp,
-											  velo,
-											  diffusivity,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
+      QADPressIncomp27<<< grid.grid, grid.threads >>>(DD,
+                                                DD27,
+                                              temp,
+                                              velo,
+                                              diffusivity,
+                                              k_Q,
+                                              QQ,
+                                              numberOfBCnodes,
+                                              om1,
+                                              neighborX,
+                                              neighborY,
+                                              neighborZ,
+                                              size_Mat,
+                                              isEvenTimestep);
       getLastCudaError("QADPressIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -1991,44 +1991,44 @@ void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions*
 }
 //////////////////////////////////////////////////////////////////////////
 void QDevCompThinWalls27(unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* geom,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* geom,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int* neighborWSB,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
    QDeviceCompThinWallsPartOne27 <<< grid.grid, grid.threads >>> (DD,
-														 k_Q,
-														 QQ,
-														 numberOfBCnodes,
-														 om1,
-														 neighborX,
-														 neighborY,
-														 neighborZ,
-														 size_Mat,
-														 isEvenTimestep);
+                                                         k_Q,
+                                                         QQ,
+                                                         numberOfBCnodes,
+                                                         om1,
+                                                         neighborX,
+                                                         neighborY,
+                                                         neighborZ,
+                                                         size_Mat,
+                                                         isEvenTimestep);
    getLastCudaError("QDeviceCompThinWallsPartOne27 execution failed");
 
    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> ( DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												geom,
-												neighborX,
-												neighborY,
-												neighborZ,
-												neighborWSB,
-												size_Mat,
-												isEvenTimestep);
+                                                k_Q,
+                                                QQ,
+                                                numberOfBCnodes,
+                                                geom,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                neighborWSB,
+                                                size_Mat,
+                                                isEvenTimestep);
    getLastCudaError("QThinWallsPartTwo27 execution failed");
 
 }
@@ -2053,58 +2053,58 @@ void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryC
 }
 //////////////////////////////////////////////////////////////////////////
 void QDevIncompHighNu27( unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
-												   DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
+                                                   DD,
+                                                   k_Q,
+                                                   QQ,
+                                                   numberOfBCnodes,
+                                                   om1,
+                                                   neighborX,
+                                                   neighborY,
+                                                   neighborZ,
+                                                   size_Mat,
+                                                   isEvenTimestep);
       getLastCudaError("QDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QDevCompHighNu27(   unsigned int numberOfThreads,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
-												   DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
+                                                   DD,
+                                                   k_Q,
+                                                   QQ,
+                                                   numberOfBCnodes,
+                                                   om1,
+                                                   neighborX,
+                                                   neighborY,
+                                                   neighborZ,
+                                                   size_Mat,
+                                                   isEvenTimestep);
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -2130,59 +2130,59 @@ void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryCo
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDeviceCouette27(unsigned int numberOfThreads,
-									real* vx,
-									real* vy,
-									real* vz,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* vx,
+                                    real* vy,
+                                    real* vz,
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QVelDevCouette27<<< grid.grid, grid.threads >>> ( vx,
-												vy,
-												vz,
-												DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												om1,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
+                                                vy,
+                                                vz,
+                                                DD,
+                                                k_Q,
+                                                QQ,
+                                                numberOfBCnodes,
+                                                om1,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                isEvenTimestep);
       getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevice1h27(   unsigned int numberOfThreads,
-								  int nx,
-								  int ny,
-								  real* vx,
-								  real* vy,
-								  real* vz,
-								  real* DD,
-								  int* k_Q,
-								  real* QQ,
-								  unsigned int numberOfBCnodes,
-								  real om1,
-								  real Phi,
-								  real angularVelocity,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  real* coordX,
-								  real* coordY,
-								  real* coordZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep)
+                                  int nx,
+                                  int ny,
+                                  real* vx,
+                                  real* vy,
+                                  real* vz,
+                                  real* DD,
+                                  int* k_Q,
+                                  real* QQ,
+                                  unsigned int numberOfBCnodes,
+                                  real om1,
+                                  real Phi,
+                                  real angularVelocity,
+                                  unsigned int* neighborX,
+                                  unsigned int* neighborY,
+                                  unsigned int* neighborZ,
+                                  real* coordX,
+                                  real* coordY,
+                                  real* coordZ,
+                                  unsigned int size_Mat,
+                                  bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
@@ -2196,14 +2196,14 @@ void QVelDevice1h27(   unsigned int numberOfThreads,
                                           QQ,
                                           numberOfBCnodes,
                                           om1,
-										  Phi,
-										  angularVelocity,
+                                          Phi,
+                                          angularVelocity,
                                           neighborX,
                                           neighborY,
                                           neighborZ,
-										  coordX,
-										  coordY,
-										  coordZ,
+                                          coordX,
+                                          coordY,
+                                          coordZ,
                                           size_Mat,
                                           isEvenTimestep);
       getLastCudaError("QVelDevice27 execution failed");
@@ -2234,36 +2234,36 @@ void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions*
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* vx,
+                                      real* vy,
+                                      real* vz,
+                                      real* DD,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QVelDeviceCompPlusSlip27<<< grid.grid, grid.threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
+                                                      vx,
+                                                      vy,
+                                                      vz,
+                                                      DD,
+                                                      k_Q,
+                                                      QQ,
+                                                      numberOfBCnodes,
+                                                      om1,
+                                                      neighborX,
+                                                      neighborY,
+                                                      neighborZ,
+                                                      size_Mat,
+                                                      isEvenTimestep);
       getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -2277,7 +2277,7 @@ void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditio
             boundaryCondition->Vy,
             boundaryCondition->Vz,
             parameterDevice->distributions.f[0],
-            boundaryCondition->k,        
+            boundaryCondition->k,
             boundaryCondition->q27[0],
             boundaryCondition->numberOfBCnodes,
             parameterDevice->omega,
@@ -2290,40 +2290,40 @@ void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditio
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevCompThinWalls27(unsigned int numberOfThreads,
-							           real* vx,
-							           real* vy,
-							           real* vz,
-							           real* DD,
-							           int* k_Q,
-							           real* QQ,
-							           unsigned int numberOfBCnodes,
-							           real om1,
-									     unsigned int* geom,
-							           unsigned int* neighborX,
-							           unsigned int* neighborY,
-							           unsigned int* neighborZ,
-									     unsigned int* neighborWSB,
-							           unsigned int size_Mat,
-							           bool isEvenTimestep)
+                                       real* vx,
+                                       real* vy,
+                                       real* vz,
+                                       real* DD,
+                                       int* k_Q,
+                                       real* QQ,
+                                       unsigned int numberOfBCnodes,
+                                       real om1,
+                                         unsigned int* geom,
+                                       unsigned int* neighborX,
+                                       unsigned int* neighborY,
+                                       unsigned int* neighborZ,
+                                         unsigned int* neighborWSB,
+                                       unsigned int size_Mat,
+                                       bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
    QVelDeviceCompThinWallsPartOne27<<< grid.grid, grid.threads >>> (vx,
-											                  vy,
-											                  vz,
-											                  DD,
-											                  k_Q,
-											                  QQ,
-											                  numberOfBCnodes,
-											                  om1,
-											                  neighborX,
-											                  neighborY,
-											                  neighborZ,
-											                  size_Mat,
-											                  isEvenTimestep);
+                                                              vy,
+                                                              vz,
+                                                              DD,
+                                                              k_Q,
+                                                              QQ,
+                                                              numberOfBCnodes,
+                                                              om1,
+                                                              neighborX,
+                                                              neighborY,
+                                                              neighborZ,
+                                                              size_Mat,
+                                                              isEvenTimestep);
    getLastCudaError("QVelDeviceCompThinWallsPartOne27 execution failed");
 
-	QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
+    QThinWallsPartTwo27 <<< grid.grid, grid.threads >>> (
        DD,
        k_Q,
        QQ,
@@ -2361,135 +2361,135 @@ void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundar
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevIncompHighNu27(unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* vx,
+                                      real* vy,
+                                      real* vz,
+                                      real* DD,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QVelDeviceIncompHighNu27<<< grid.grid, grid.threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
+                                                      vx,
+                                                      vy,
+                                                      vz,
+                                                      DD,
+                                                      k_Q,
+                                                      QQ,
+                                                      numberOfBCnodes,
+                                                      om1,
+                                                      neighborX,
+                                                      neighborY,
+                                                      neighborZ,
+                                                      size_Mat,
+                                                      isEvenTimestep);
       getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVelDevCompHighNu27(  unsigned int numberOfThreads,
-									  real* vx,
-									  real* vy,
-									  real* vz,
-									  real* DD,
-									  int* k_Q,
-									  real* QQ,
-									  unsigned int numberOfBCnodes,
-									  real om1,
-									  unsigned int* neighborX,
-									  unsigned int* neighborY,
-									  unsigned int* neighborZ,
-									  unsigned int size_Mat,
-									  bool isEvenTimestep)
+                                      real* vx,
+                                      real* vy,
+                                      real* vz,
+                                      real* DD,
+                                      int* k_Q,
+                                      real* QQ,
+                                      unsigned int numberOfBCnodes,
+                                      real om1,
+                                      unsigned int* neighborX,
+                                      unsigned int* neighborY,
+                                      unsigned int* neighborZ,
+                                      unsigned int size_Mat,
+                                      bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       QVelDeviceCompHighNu27<<< grid.grid, grid.threads >>> (
-													  vx,
-													  vy,
-													  vz,
-													  DD,
-													  k_Q,
-													  QQ,
-													  numberOfBCnodes,
-													  om1,
-													  neighborX,
-													  neighborY,
-													  neighborZ,
-													  size_Mat,
-													  isEvenTimestep);
+                                                      vx,
+                                                      vy,
+                                                      vz,
+                                                      DD,
+                                                      k_Q,
+                                                      QQ,
+                                                      numberOfBCnodes,
+                                                      om1,
+                                                      neighborX,
+                                                      neighborY,
+                                                      neighborZ,
+                                                      size_Mat,
+                                                      isEvenTimestep);
       getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVeloDevEQ27(unsigned int numberOfThreads,
-							 real* VeloX,
-							 real* VeloY,
-							 real* VeloZ,
-							 real* DD,
-							 int* k_Q,
-							 int numberOfBCnodes,
-							 real om1,
-							 unsigned int* neighborX,
-							 unsigned int* neighborY,
-							 unsigned int* neighborZ,
-							 unsigned int size_Mat,
-							 bool isEvenTimestep)
+                             real* VeloX,
+                             real* VeloY,
+                             real* VeloZ,
+                             real* DD,
+                             int* k_Q,
+                             int numberOfBCnodes,
+                             real om1,
+                             unsigned int* neighborX,
+                             unsigned int* neighborY,
+                             unsigned int* neighborZ,
+                             unsigned int size_Mat,
+                             bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
    QVeloDeviceEQ27<<< grid.grid, grid.threads >>> (VeloX,
-											 VeloY,
-											 VeloZ,
-											 DD,
-											 k_Q,
-											 numberOfBCnodes,
-											 om1,
-											 neighborX,
-											 neighborY,
-											 neighborZ,
-											 size_Mat,
-											 isEvenTimestep);
+                                             VeloY,
+                                             VeloZ,
+                                             DD,
+                                             k_Q,
+                                             numberOfBCnodes,
+                                             om1,
+                                             neighborX,
+                                             neighborY,
+                                             neighborZ,
+                                             size_Mat,
+                                             isEvenTimestep);
       getLastCudaError("QVeloDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QVeloStreetDevEQ27(
-	uint  numberOfThreads,
-	real* veloXfraction,
-	real* veloYfraction,
-	int*  naschVelo,
-	real* DD,
-	int*  naschIndex,
-	int   numberOfStreetNodes,
-	real  velocityRatio,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	uint  size_Mat,
-	bool  isEvenTimestep)
+    uint  numberOfThreads,
+    real* veloXfraction,
+    real* veloYfraction,
+    int*  naschVelo,
+    real* DD,
+    int*  naschIndex,
+    int   numberOfStreetNodes,
+    real  velocityRatio,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    uint  size_Mat,
+    bool  isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfStreetNodes);
 
-	QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > (
-		veloXfraction,
-		veloYfraction,
-		naschVelo,
-		DD,
-		naschIndex,
-		numberOfStreetNodes,
-		velocityRatio,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
+    QVeloStreetDeviceEQ27 << < grid.grid, grid.threads >> > (
+        veloXfraction,
+        veloYfraction,
+        naschVelo,
+        DD,
+        naschIndex,
+        numberOfStreetNodes,
+        velocityRatio,
+        neighborX,
+        neighborY,
+        neighborZ,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
@@ -2515,7 +2515,7 @@ void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, Q
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-   
+
    QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
          parameterDevice->distributions.f[0],
          boundaryCondition->k,
@@ -2555,7 +2555,7 @@ void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditi
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-   
+
    QSlipDeviceComp27<<< grid, threads >>> (
          parameterDevice->distributions.f[0],
          boundaryCondition->k,
@@ -2590,68 +2590,68 @@ void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryCondit
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipGeomDevComp27(unsigned int numberOfThreads,
-								   real* DD,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   real* NormalX,
-								   real* NormalY,
-								   real* NormalZ,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
-{
-	vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes);
+                                   real* DD,
+                                   int* k_Q,
+                                   real* QQ,
+                                   unsigned int numberOfBCnodes,
+                                   real om1,
+                                   real* NormalX,
+                                   real* NormalY,
+                                   real* NormalZ,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep)
+{
+    vf::cuda::CudaGrid grid(numberOfThreads, numberOfBCnodes);
 
    QSlipGeomDeviceComp27<<< grid.grid, grid.threads >>> (DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   NormalX,
-												   NormalY,
-												   NormalZ,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
+                                                   k_Q,
+                                                   QQ,
+                                                   numberOfBCnodes,
+                                                   om1,
+                                                   NormalX,
+                                                   NormalY,
+                                                   NormalZ,
+                                                   neighborX,
+                                                   neighborY,
+                                                   neighborZ,
+                                                   size_Mat,
+                                                   isEvenTimestep);
    getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void QSlipNormDevComp27(unsigned int numberOfThreads,
-								   real* DD,
-								   int* k_Q,
-								   real* QQ,
-								   unsigned int numberOfBCnodes,
-								   real om1,
-								   real* NormalX,
-								   real* NormalY,
-								   real* NormalZ,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep)
+                                   real* DD,
+                                   int* k_Q,
+                                   real* QQ,
+                                   unsigned int numberOfBCnodes,
+                                   real om1,
+                                   real* NormalX,
+                                   real* NormalY,
+                                   real* NormalZ,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
    QSlipNormDeviceComp27<<< grid.grid, grid.threads >>> (DD,
-												   k_Q,
-												   QQ,
-												   numberOfBCnodes,
-												   om1,
-												   NormalX,
-												   NormalY,
-												   NormalZ,
-												   neighborX,
-												   neighborY,
-												   neighborZ,
-												   size_Mat,
-												   isEvenTimestep);
+                                                   k_Q,
+                                                   QQ,
+                                                   numberOfBCnodes,
+                                                   om1,
+                                                   NormalX,
+                                                   NormalY,
+                                                   NormalZ,
+                                                   neighborX,
+                                                   neighborY,
+                                                   neighborZ,
+                                                   size_Mat,
+                                                   isEvenTimestep);
       getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -2796,36 +2796,36 @@ void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions
 //////////////////////////////////////////////////////////////////////////
 void QPressDevAntiBB27(  unsigned int numberOfThreads,
                                     real* rhoBC,
-									real* vx,
-									real* vy,
-									real* vz,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* vx,
+                                    real* vy,
+                                    real* vz,
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
    QPressDeviceAntiBB27<<< grid.grid, grid.threads >>>( rhoBC,
-												vx,
-												vy,
-												vz,
-												DD,
-												k_Q,
-												QQ,
-												numberOfBCnodes,
-												om1,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
+                                                vx,
+                                                vy,
+                                                vz,
+                                                DD,
+                                                k_Q,
+                                                QQ,
+                                                numberOfBCnodes,
+                                                om1,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                isEvenTimestep);
    getLastCudaError("QPressDeviceAntiBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -3108,32 +3108,32 @@ void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* bo
 }
 //////////////////////////////////////////////////////////////////////////
 void QPressDev27_IntBB(  unsigned int numberOfThreads,
-									real* rho,
-									real* DD,
-									int* k_Q,
-									real* QQ,
-									unsigned int numberOfBCnodes,
-									real om1,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int size_Mat,
-									bool isEvenTimestep)
+                                    real* rho,
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-	QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (rho,
-													DD,
-													k_Q,
-													QQ,
-													numberOfBCnodes,
-													om1,
-													neighborX,
-													neighborY,
-													neighborZ,
-													size_Mat,
-													isEvenTimestep);
-	getLastCudaError("QPressDevice27_IntBB execution failed");
+    QPressDevice27_IntBB<<< grid.grid, grid.threads >>> (rho,
+                                                    DD,
+                                                    k_Q,
+                                                    QQ,
+                                                    numberOfBCnodes,
+                                                    om1,
+                                                    neighborX,
+                                                    neighborY,
+                                                    neighborZ,
+                                                    size_Mat,
+                                                    isEvenTimestep);
+    getLastCudaError("QPressDevice27_IntBB execution failed");
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
@@ -3208,146 +3208,146 @@ void VelSchlaffer27(  unsigned int numberOfThreads,
       getLastCudaError("VelSchlaff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice, 
-								QforPrecursorBoundaryConditions* boundaryCondition, 
-								real timeRatio, 
-								real velocityRatio)
-{
-
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-	QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-																boundaryCondition->numberOfBCnodes,
-																boundaryCondition->numberOfPrecursorNodes,
-																boundaryCondition->sizeQ,
-																parameterDevice->omega,
-																parameterDevice->distributions.f[0],
-																boundaryCondition->q27[0],
-																parameterDevice->neighborX,
-																parameterDevice->neighborY,
-																parameterDevice->neighborZ,
-																boundaryCondition->planeNeighbor0PP,
-																boundaryCondition->planeNeighbor0PM,
-																boundaryCondition->planeNeighbor0MP,
-																boundaryCondition->planeNeighbor0MM,
-																boundaryCondition->weights0PP,
-																boundaryCondition->weights0PM,
-																boundaryCondition->weights0MP,
-																boundaryCondition->weights0MM,
-																boundaryCondition->last,
-																boundaryCondition->current,
-																boundaryCondition->velocityX,
-																boundaryCondition->velocityY,
-																boundaryCondition->velocityZ,
-																timeRatio,
-																velocityRatio,
-																parameterDevice->numberOfNodes,
-																parameterDevice->isEvenTimestep);
-	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
+void QPrecursorDevCompZeroPress(LBMSimulationParameter* parameterDevice,
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    QPrecursorDeviceCompZeroPress<<< grid.grid, grid.threads >>>(boundaryCondition->k,
+                                                                boundaryCondition->numberOfBCnodes,
+                                                                boundaryCondition->numberOfPrecursorNodes,
+                                                                boundaryCondition->sizeQ,
+                                                                parameterDevice->omega,
+                                                                parameterDevice->distributions.f[0],
+                                                                boundaryCondition->q27[0],
+                                                                parameterDevice->neighborX,
+                                                                parameterDevice->neighborY,
+                                                                parameterDevice->neighborZ,
+                                                                boundaryCondition->planeNeighbor0PP,
+                                                                boundaryCondition->planeNeighbor0PM,
+                                                                boundaryCondition->planeNeighbor0MP,
+                                                                boundaryCondition->planeNeighbor0MM,
+                                                                boundaryCondition->weights0PP,
+                                                                boundaryCondition->weights0PM,
+                                                                boundaryCondition->weights0MP,
+                                                                boundaryCondition->weights0MM,
+                                                                boundaryCondition->last,
+                                                                boundaryCondition->current,
+                                                                boundaryCondition->velocityX,
+                                                                boundaryCondition->velocityY,
+                                                                boundaryCondition->velocityZ,
+                                                                timeRatio,
+                                                                velocityRatio,
+                                                                parameterDevice->numberOfNodes,
+                                                                parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
 
 }
 //////////////////////////////////////////////////////////////////////////
 void PrecursorDevEQ27( LBMSimulationParameter* parameterDevice,
-						QforPrecursorBoundaryConditions* boundaryCondition,
-						real timeRatio,
-						real velocityRatio)
-{
-
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-	PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-													boundaryCondition->numberOfBCnodes,
-													boundaryCondition->numberOfPrecursorNodes,
-													parameterDevice->omega,
-													parameterDevice->distributions.f[0],
-													parameterDevice->neighborX,
-													parameterDevice->neighborX,
-													parameterDevice->neighborX,
-													boundaryCondition->planeNeighbor0PP,
-													boundaryCondition->planeNeighbor0PM,
-													boundaryCondition->planeNeighbor0MP,
-													boundaryCondition->planeNeighbor0MM,
-													boundaryCondition->weights0PP,
-													boundaryCondition->weights0PM,
-													boundaryCondition->weights0MP,
-													boundaryCondition->weights0MM,
-													boundaryCondition->last,
-													boundaryCondition->current,
-													boundaryCondition->velocityX,
-													boundaryCondition->velocityY,
-													boundaryCondition->velocityZ,
-													timeRatio,
-													velocityRatio,
-													parameterDevice->numberOfNodes,
-													parameterDevice->isEvenTimestep);
-	getLastCudaError("PrecursorDeviceEQ27 execution failed"); 
+                        QforPrecursorBoundaryConditions* boundaryCondition,
+                        real timeRatio,
+                        real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    PrecursorDeviceEQ27<<< grid.grid, grid.threads >>>(boundaryCondition->k,
+                                                    boundaryCondition->numberOfBCnodes,
+                                                    boundaryCondition->numberOfPrecursorNodes,
+                                                    parameterDevice->omega,
+                                                    parameterDevice->distributions.f[0],
+                                                    parameterDevice->neighborX,
+                                                    parameterDevice->neighborX,
+                                                    parameterDevice->neighborX,
+                                                    boundaryCondition->planeNeighbor0PP,
+                                                    boundaryCondition->planeNeighbor0PM,
+                                                    boundaryCondition->planeNeighbor0MP,
+                                                    boundaryCondition->planeNeighbor0MM,
+                                                    boundaryCondition->weights0PP,
+                                                    boundaryCondition->weights0PM,
+                                                    boundaryCondition->weights0MP,
+                                                    boundaryCondition->weights0MM,
+                                                    boundaryCondition->last,
+                                                    boundaryCondition->current,
+                                                    boundaryCondition->velocityX,
+                                                    boundaryCondition->velocityY,
+                                                    boundaryCondition->velocityZ,
+                                                    timeRatio,
+                                                    velocityRatio,
+                                                    parameterDevice->numberOfNodes,
+                                                    parameterDevice->isEvenTimestep);
+    getLastCudaError("PrecursorDeviceEQ27 execution failed");
 
 }
 //////////////////////////////////////////////////////////////////////////
 void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
-								QforPrecursorBoundaryConditions* boundaryCondition,
-								real timeRatio,
-								real velocityRatio)
-{
-
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-	PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-															boundaryCondition->numberOfBCnodes,
-															boundaryCondition->numberOfPrecursorNodes,
-															parameterDevice->distributions.f[0],
-															parameterDevice->neighborX,
-															parameterDevice->neighborY,
-															parameterDevice->neighborZ,
-															boundaryCondition->planeNeighbor0PP,
-															boundaryCondition->planeNeighbor0PM,
-															boundaryCondition->planeNeighbor0MP,
-															boundaryCondition->planeNeighbor0MM,
-															boundaryCondition->weights0PP,
-															boundaryCondition->weights0PM,
-															boundaryCondition->weights0MP,
-															boundaryCondition->weights0MM,
-															boundaryCondition->last,
-															boundaryCondition->current,
-															timeRatio,
-															parameterDevice->numberOfNodes,
-															parameterDevice->isEvenTimestep);
-	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
+                                                            boundaryCondition->numberOfBCnodes,
+                                                            boundaryCondition->numberOfPrecursorNodes,
+                                                            parameterDevice->distributions.f[0],
+                                                            parameterDevice->neighborX,
+                                                            parameterDevice->neighborY,
+                                                            parameterDevice->neighborZ,
+                                                            boundaryCondition->planeNeighbor0PP,
+                                                            boundaryCondition->planeNeighbor0PM,
+                                                            boundaryCondition->planeNeighbor0MP,
+                                                            boundaryCondition->planeNeighbor0MM,
+                                                            boundaryCondition->weights0PP,
+                                                            boundaryCondition->weights0PM,
+                                                            boundaryCondition->weights0MP,
+                                                            boundaryCondition->weights0MM,
+                                                            boundaryCondition->last,
+                                                            boundaryCondition->current,
+                                                            timeRatio,
+                                                            parameterDevice->numberOfNodes,
+                                                            parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
 
 }
 
 //////////////////////////////////////////////////////////////////////////
 void QPrecursorDevDistributions( LBMSimulationParameter* parameterDevice,
-								QforPrecursorBoundaryConditions* boundaryCondition,
-								real timeRatio,
-								real velocityRatio)
-{
-
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
-
-	QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
-																boundaryCondition->q27[0],
-																boundaryCondition->sizeQ,
-																boundaryCondition->numberOfBCnodes,
-																boundaryCondition->numberOfPrecursorNodes,
-																parameterDevice->distributions.f[0],
-																parameterDevice->neighborX,
-																parameterDevice->neighborY,
-																parameterDevice->neighborZ,
-																boundaryCondition->planeNeighbor0PP,
-																boundaryCondition->planeNeighbor0PM,
-																boundaryCondition->planeNeighbor0MP,
-																boundaryCondition->planeNeighbor0MM,
-																boundaryCondition->weights0PP,
-																boundaryCondition->weights0PM,
-																boundaryCondition->weights0MP,
-																boundaryCondition->weights0MM,
-																boundaryCondition->last,
-																boundaryCondition->current,
-																timeRatio,
-																parameterDevice->numberOfNodes,
-																parameterDevice->isEvenTimestep);
-	getLastCudaError("QPrecursorDeviceCompZeroPress execution failed"); 
+                                QforPrecursorBoundaryConditions* boundaryCondition,
+                                real timeRatio,
+                                real velocityRatio)
+{
+
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+
+    QPrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k,
+                                                                boundaryCondition->q27[0],
+                                                                boundaryCondition->sizeQ,
+                                                                boundaryCondition->numberOfBCnodes,
+                                                                boundaryCondition->numberOfPrecursorNodes,
+                                                                parameterDevice->distributions.f[0],
+                                                                parameterDevice->neighborX,
+                                                                parameterDevice->neighborY,
+                                                                parameterDevice->neighborZ,
+                                                                boundaryCondition->planeNeighbor0PP,
+                                                                boundaryCondition->planeNeighbor0PM,
+                                                                boundaryCondition->planeNeighbor0MP,
+                                                                boundaryCondition->planeNeighbor0MM,
+                                                                boundaryCondition->weights0PP,
+                                                                boundaryCondition->weights0PM,
+                                                                boundaryCondition->weights0MP,
+                                                                boundaryCondition->weights0MM,
+                                                                boundaryCondition->last,
+                                                                boundaryCondition->current,
+                                                                timeRatio,
+                                                                parameterDevice->numberOfNodes,
+                                                                parameterDevice->isEvenTimestep);
+    getLastCudaError("QPrecursorDeviceCompZeroPress execution failed");
 
 }
 //////////////////////////////////////////////////////////////////////////
@@ -3360,7 +3360,7 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
                             real* uy,
                             real* uz,
                             int* k_Q,
-							unsigned int size_Prop,
+                            unsigned int size_Prop,
                             unsigned int size_Mat,
                             unsigned int* bcMatD,
                             real* DD,
@@ -3375,10 +3375,10 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
                                        ux,
                                        uy,
                                        uz,
-									   k_Q,
-									   size_Prop,
+                                       k_Q,
+                                       size_Prop,
                                        size_Mat,
-									   bcMatD,
+                                       bcMatD,
                                        DD,
                                        EvenOrOdd);
       getLastCudaError("PropellerBC execution failed");
@@ -3408,7 +3408,7 @@ void ScaleCF27( real* DC,
                         unsigned int numberOfThreads)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
-   
+
       scaleCF27<<< grid.grid, grid.threads >>> ( DC,
                                              DF,
                                              neighborCX,
@@ -3638,263 +3638,263 @@ void ScaleCF_Fix_27(  real* DC,
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_Fix_comp_27( real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF)
+                                     real* DF,
+                                     unsigned int* neighborCX,
+                                     unsigned int* neighborCY,
+                                     unsigned int* neighborCZ,
+                                     unsigned int* neighborFX,
+                                     unsigned int* neighborFY,
+                                     unsigned int* neighborFZ,
+                                     unsigned int size_MatC,
+                                     unsigned int size_MatF,
+                                     bool isEvenTimestep,
+                                     unsigned int* posCSWB,
+                                     unsigned int* posFSWB,
+                                     unsigned int kCF,
+                                     real omCoarse,
+                                     real omFine,
+                                     real nu,
+                                     unsigned int nxC,
+                                     unsigned int nyC,
+                                     unsigned int nxF,
+                                     unsigned int nyF,
+                                     unsigned int numberOfThreads,
+                                     OffCF offCF)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_Fix_comp_27<<< grid.grid, grid.threads >>>(   DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
+                                                        DF,
+                                                        neighborCX,
+                                                        neighborCY,
+                                                        neighborCZ,
+                                                        neighborFX,
+                                                        neighborFY,
+                                                        neighborFZ,
+                                                        size_MatC,
+                                                        size_MatF,
+                                                        isEvenTimestep,
+                                                        posCSWB,
+                                                        posFSWB,
+                                                        kCF,
+                                                        omCoarse,
+                                                        omFine,
+                                                        nu,
+                                                        nxC,
+                                                        nyC,
+                                                        nxF,
+                                                        nyF,
+                                                        offCF);
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_0817_comp_27(real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF,
+                                     real* DF,
+                                     unsigned int* neighborCX,
+                                     unsigned int* neighborCY,
+                                     unsigned int* neighborCZ,
+                                     unsigned int* neighborFX,
+                                     unsigned int* neighborFY,
+                                     unsigned int* neighborFZ,
+                                     unsigned int size_MatC,
+                                     unsigned int size_MatF,
+                                     bool isEvenTimestep,
+                                     unsigned int* posCSWB,
+                                     unsigned int* posFSWB,
+                                     unsigned int kCF,
+                                     real omCoarse,
+                                     real omFine,
+                                     real nu,
+                                     unsigned int nxC,
+                                     unsigned int nyC,
+                                     unsigned int nxF,
+                                     unsigned int nyF,
+                                     unsigned int numberOfThreads,
+                                     OffCF offCF,
                             CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>>(  DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
+                                                        DF,
+                                                        neighborCX,
+                                                        neighborCY,
+                                                        neighborCZ,
+                                                        neighborFX,
+                                                        neighborFY,
+                                                        neighborFZ,
+                                                        size_MatC,
+                                                        size_MatF,
+                                                        isEvenTimestep,
+                                                        posCSWB,
+                                                        posFSWB,
+                                                        kCF,
+                                                        omCoarse,
+                                                        omFine,
+                                                        nu,
+                                                        nxC,
+                                                        nyC,
+                                                        nxF,
+                                                        nyF,
+                                                        offCF);
       getLastCudaError("scaleCF_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_comp_D3Q27F3_2018(real* DC,
-										  real* DF,
-										  real* G6,
-										  unsigned int* neighborCX,
-										  unsigned int* neighborCY,
-										  unsigned int* neighborCZ,
-										  unsigned int* neighborFX,
-										  unsigned int* neighborFY,
-										  unsigned int* neighborFZ,
-										  unsigned int size_MatC,
-										  unsigned int size_MatF,
-										  bool isEvenTimestep,
-										  unsigned int* posCSWB,
-										  unsigned int* posFSWB,
-										  unsigned int kCF,
-										  real omCoarse,
-										  real omFine,
-										  real nu,
-										  unsigned int nxC,
-										  unsigned int nyC,
-										  unsigned int nxF,
-										  unsigned int nyF,
-										  unsigned int numberOfThreads,
-										  OffCF offCF)
+                                          real* DF,
+                                          real* G6,
+                                          unsigned int* neighborCX,
+                                          unsigned int* neighborCY,
+                                          unsigned int* neighborCZ,
+                                          unsigned int* neighborFX,
+                                          unsigned int* neighborFY,
+                                          unsigned int* neighborFZ,
+                                          unsigned int size_MatC,
+                                          unsigned int size_MatF,
+                                          bool isEvenTimestep,
+                                          unsigned int* posCSWB,
+                                          unsigned int* posFSWB,
+                                          unsigned int kCF,
+                                          real omCoarse,
+                                          real omFine,
+                                          real nu,
+                                          unsigned int nxC,
+                                          unsigned int nyC,
+                                          unsigned int nxF,
+                                          unsigned int nyF,
+                                          unsigned int numberOfThreads,
+                                          OffCF offCF)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>>(DC,
-															DF,
-															G6,
-															neighborCX,
-															neighborCY,
-															neighborCZ,
-															neighborFX,
-															neighborFY,
-															neighborFZ,
-															size_MatC,
-															size_MatF,
-															isEvenTimestep,
-															posCSWB,
-															posFSWB,
-															kCF,
-															omCoarse,
-															omFine,
-															nu,
-															nxC,
-															nyC,
-															nxF,
-															nyF,
-															offCF);
+                                                            DF,
+                                                            G6,
+                                                            neighborCX,
+                                                            neighborCY,
+                                                            neighborCZ,
+                                                            neighborFX,
+                                                            neighborFY,
+                                                            neighborFZ,
+                                                            size_MatC,
+                                                            size_MatF,
+                                                            isEvenTimestep,
+                                                            posCSWB,
+                                                            posFSWB,
+                                                            kCF,
+                                                            omCoarse,
+                                                            omFine,
+                                                            nu,
+                                                            nxC,
+                                                            nyC,
+                                                            nxF,
+                                                            nyF,
+                                                            offCF);
       getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_comp_D3Q27F3(real* DC,
-									 real* DF,
-									 real* G6,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF,
+                                     real* DF,
+                                     real* G6,
+                                     unsigned int* neighborCX,
+                                     unsigned int* neighborCY,
+                                     unsigned int* neighborCZ,
+                                     unsigned int* neighborFX,
+                                     unsigned int* neighborFY,
+                                     unsigned int* neighborFZ,
+                                     unsigned int size_MatC,
+                                     unsigned int size_MatF,
+                                     bool isEvenTimestep,
+                                     unsigned int* posCSWB,
+                                     unsigned int* posFSWB,
+                                     unsigned int kCF,
+                                     real omCoarse,
+                                     real omFine,
+                                     real nu,
+                                     unsigned int nxC,
+                                     unsigned int nyC,
+                                     unsigned int nxF,
+                                     unsigned int nyF,
+                                     unsigned int numberOfThreads,
+                                     OffCF offCF,
                             CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>>( DC,
-														DF,
-														G6,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
+                                                        DF,
+                                                        G6,
+                                                        neighborCX,
+                                                        neighborCY,
+                                                        neighborCZ,
+                                                        neighborFX,
+                                                        neighborFY,
+                                                        neighborFZ,
+                                                        size_MatC,
+                                                        size_MatF,
+                                                        isEvenTimestep,
+                                                        posCSWB,
+                                                        posFSWB,
+                                                        kCF,
+                                                        omCoarse,
+                                                        omFine,
+                                                        nu,
+                                                        nxC,
+                                                        nyC,
+                                                        nxF,
+                                                        nyF,
+                                                        offCF);
       getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_staggered_time_comp_27(  real* DC,
-												 real* DF,
-												 unsigned int* neighborCX,
-												 unsigned int* neighborCY,
-												 unsigned int* neighborCZ,
-												 unsigned int* neighborFX,
-												 unsigned int* neighborFY,
-												 unsigned int* neighborFZ,
-												 unsigned int size_MatC,
-												 unsigned int size_MatF,
-												 bool isEvenTimestep,
-												 unsigned int* posCSWB,
-												 unsigned int* posFSWB,
-												 unsigned int kCF,
-												 real omCoarse,
-												 real omFine,
-												 real nu,
-												 unsigned int nxC,
-												 unsigned int nyC,
-												 unsigned int nxF,
-												 unsigned int nyF,
-												 unsigned int numberOfThreads,
-												 OffCF offCF)
+                                                 real* DF,
+                                                 unsigned int* neighborCX,
+                                                 unsigned int* neighborCY,
+                                                 unsigned int* neighborCZ,
+                                                 unsigned int* neighborFX,
+                                                 unsigned int* neighborFY,
+                                                 unsigned int* neighborFZ,
+                                                 unsigned int size_MatC,
+                                                 unsigned int size_MatF,
+                                                 bool isEvenTimestep,
+                                                 unsigned int* posCSWB,
+                                                 unsigned int* posFSWB,
+                                                 unsigned int kCF,
+                                                 real omCoarse,
+                                                 real omFine,
+                                                 real nu,
+                                                 unsigned int nxC,
+                                                 unsigned int nyC,
+                                                 unsigned int nxF,
+                                                 unsigned int nyF,
+                                                 unsigned int numberOfThreads,
+                                                 OffCF offCF)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_staggered_time_comp_27<<< grid.grid, grid.threads >>>(    DC,
-																	DF,
-																	neighborCX,
-																	neighborCY,
-																	neighborCZ,
-																	neighborFX,
-																	neighborFY,
-																	neighborFZ,
-																	size_MatC,
-																	size_MatF,
-																	isEvenTimestep,
-																	posCSWB,
-																	posFSWB,
-																	kCF,
-																	omCoarse,
-																	omFine,
-																	nu,
-																	nxC,
-																	nyC,
-																	nxF,
-																	nyF,
-																	offCF);
+                                                                    DF,
+                                                                    neighborCX,
+                                                                    neighborCY,
+                                                                    neighborCZ,
+                                                                    neighborFX,
+                                                                    neighborFY,
+                                                                    neighborFZ,
+                                                                    size_MatC,
+                                                                    size_MatF,
+                                                                    isEvenTimestep,
+                                                                    posCSWB,
+                                                                    posFSWB,
+                                                                    kCF,
+                                                                    omCoarse,
+                                                                    omFine,
+                                                                    nu,
+                                                                    nxC,
+                                                                    nyC,
+                                                                    nxF,
+                                                                    nyF,
+                                                                    offCF);
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -3958,157 +3958,157 @@ void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati
 
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
-											 real* DF,
-											 unsigned int* neighborCX,
-											 unsigned int* neighborCY,
-											 unsigned int* neighborCZ,
-											 unsigned int* neighborFX,
-											 unsigned int* neighborFY,
-											 unsigned int* neighborFZ,
-											 unsigned int size_MatC,
-											 unsigned int size_MatF,
-											 bool isEvenTimestep,
-											 unsigned int* posCSWB,
-											 unsigned int* posFSWB,
-											 unsigned int kCF,
-											 real omCoarse,
-											 real omFine,
-											 real nu,
-											 unsigned int nxC,
-											 unsigned int nyC,
-											 unsigned int nxF,
-											 unsigned int nyF,
-											 unsigned int numberOfThreads,
-											 OffCF offCF,
+                                             real* DF,
+                                             unsigned int* neighborCX,
+                                             unsigned int* neighborCY,
+                                             unsigned int* neighborCZ,
+                                             unsigned int* neighborFX,
+                                             unsigned int* neighborFY,
+                                             unsigned int* neighborFZ,
+                                             unsigned int size_MatC,
+                                             unsigned int size_MatF,
+                                             bool isEvenTimestep,
+                                             unsigned int* posCSWB,
+                                             unsigned int* posFSWB,
+                                             unsigned int kCF,
+                                             real omCoarse,
+                                             real omFine,
+                                             real nu,
+                                             unsigned int nxC,
+                                             unsigned int nyC,
+                                             unsigned int nxF,
+                                             unsigned int nyF,
+                                             unsigned int numberOfThreads,
+                                             OffCF offCF,
                                   CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(  DC,
-																DF,
-																neighborCX,
-																neighborCY,
-																neighborCZ,
-																neighborFX,
-																neighborFY,
-																neighborFZ,
-																size_MatC,
-																size_MatF,
-																isEvenTimestep,
-																posCSWB,
-																posFSWB,
-																kCF,
-																omCoarse,
-																omFine,
-																nu,
-																nxC,
-																nyC,
-																nxF,
-																nyF,
-																offCF);
+                                                                DF,
+                                                                neighborCX,
+                                                                neighborCY,
+                                                                neighborCZ,
+                                                                neighborFX,
+                                                                neighborFY,
+                                                                neighborFZ,
+                                                                size_MatC,
+                                                                size_MatF,
+                                                                isEvenTimestep,
+                                                                posCSWB,
+                                                                posFSWB,
+                                                                kCF,
+                                                                omCoarse,
+                                                                omFine,
+                                                                nu,
+                                                                nxC,
+                                                                nyC,
+                                                                nxF,
+                                                                nyF,
+                                                                offCF);
       getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_AA2016_comp_27(real* DC,
-									   real* DF,
-									   unsigned int* neighborCX,
-									   unsigned int* neighborCY,
-									   unsigned int* neighborCZ,
-									   unsigned int* neighborFX,
-									   unsigned int* neighborFY,
-									   unsigned int* neighborFZ,
-									   unsigned int size_MatC,
-									   unsigned int size_MatF,
-									   bool isEvenTimestep,
-									   unsigned int* posCSWB,
-									   unsigned int* posFSWB,
-									   unsigned int kCF,
-									   real omCoarse,
-									   real omFine,
-									   real nu,
-									   unsigned int nxC,
-									   unsigned int nyC,
-									   unsigned int nxF,
-									   unsigned int nyF,
-									   unsigned int numberOfThreads,
-									   OffCF offCF,
+                                       real* DF,
+                                       unsigned int* neighborCX,
+                                       unsigned int* neighborCY,
+                                       unsigned int* neighborCZ,
+                                       unsigned int* neighborFX,
+                                       unsigned int* neighborFY,
+                                       unsigned int* neighborFZ,
+                                       unsigned int size_MatC,
+                                       unsigned int size_MatF,
+                                       bool isEvenTimestep,
+                                       unsigned int* posCSWB,
+                                       unsigned int* posFSWB,
+                                       unsigned int kCF,
+                                       real omCoarse,
+                                       real omFine,
+                                       real nu,
+                                       unsigned int nxC,
+                                       unsigned int nyC,
+                                       unsigned int nxF,
+                                       unsigned int nyF,
+                                       unsigned int numberOfThreads,
+                                       OffCF offCF,
                               CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posCSWB,
-														posFSWB,
-														kCF,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offCF);
+                                                        DF,
+                                                        neighborCX,
+                                                        neighborCY,
+                                                        neighborCZ,
+                                                        neighborFX,
+                                                        neighborFY,
+                                                        neighborFZ,
+                                                        size_MatC,
+                                                        size_MatF,
+                                                        isEvenTimestep,
+                                                        posCSWB,
+                                                        posFSWB,
+                                                        kCF,
+                                                        omCoarse,
+                                                        omFine,
+                                                        nu,
+                                                        nxC,
+                                                        nyC,
+                                                        nxF,
+                                                        nyF,
+                                                        offCF);
       getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_NSPress_27(  real* DC,
-									 real* DF,
-									 unsigned int* neighborCX,
-									 unsigned int* neighborCY,
-									 unsigned int* neighborCZ,
-									 unsigned int* neighborFX,
-									 unsigned int* neighborFY,
-									 unsigned int* neighborFZ,
-									 unsigned int size_MatC,
-									 unsigned int size_MatF,
-									 bool isEvenTimestep,
-									 unsigned int* posCSWB,
-									 unsigned int* posFSWB,
-									 unsigned int kCF,
-									 real omCoarse,
-									 real omFine,
-									 real nu,
-									 unsigned int nxC,
-									 unsigned int nyC,
-									 unsigned int nxF,
-									 unsigned int nyF,
-									 unsigned int numberOfThreads,
-									 OffCF offCF)
+                                     real* DF,
+                                     unsigned int* neighborCX,
+                                     unsigned int* neighborCY,
+                                     unsigned int* neighborCZ,
+                                     unsigned int* neighborFX,
+                                     unsigned int* neighborFY,
+                                     unsigned int* neighborFZ,
+                                     unsigned int size_MatC,
+                                     unsigned int size_MatF,
+                                     bool isEvenTimestep,
+                                     unsigned int* posCSWB,
+                                     unsigned int* posFSWB,
+                                     unsigned int kCF,
+                                     real omCoarse,
+                                     real omFine,
+                                     real nu,
+                                     unsigned int nxC,
+                                     unsigned int nyC,
+                                     unsigned int nxF,
+                                     unsigned int nyF,
+                                     unsigned int numberOfThreads,
+                                     OffCF offCF)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
       scaleCF_NSPress_27<<< grid.grid, grid.threads >>>(DC,
-													DF,
-													neighborCX,
-													neighborCY,
-													neighborCZ,
-													neighborFX,
-													neighborFY,
-													neighborFZ,
-													size_MatC,
-													size_MatF,
-													isEvenTimestep,
-													posCSWB,
-													posFSWB,
-													kCF,
-													omCoarse,
-													omFine,
-													nu,
-													nxC,
-													nyC,
-													nxF,
-													nyF,
-													offCF);
+                                                    DF,
+                                                    neighborCX,
+                                                    neighborCY,
+                                                    neighborCZ,
+                                                    neighborFX,
+                                                    neighborFY,
+                                                    neighborFZ,
+                                                    size_MatC,
+                                                    size_MatF,
+                                                    isEvenTimestep,
+                                                    posCSWB,
+                                                    posFSWB,
+                                                    kCF,
+                                                    omCoarse,
+                                                    omFine,
+                                                    nu,
+                                                    nxC,
+                                                    nyC,
+                                                    nxF,
+                                                    nyF,
+                                                    offCF);
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -4219,7 +4219,7 @@ void ScaleCFThS27( real* DC,
                               real nu,
                               real diffusivity_fine,
                               unsigned int numberOfThreads,
-							  OffCF offCF)
+                              OffCF offCF)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kCF);
 
@@ -4241,7 +4241,7 @@ void ScaleCFThS27( real* DC,
                                                 kCF,
                                                 nu,
                                                 diffusivity_fine,
-										        offCF);
+                                                offCF);
       getLastCudaError("scaleCFThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -4268,7 +4268,7 @@ void ScaleFC27( real* DC,
                            unsigned int nyF,
                            unsigned int numberOfThreads)
 {
-   
+
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC27<<< grid.grid, grid.threads >>> ( DC,
@@ -4500,263 +4500,263 @@ void ScaleFC_Fix_27(real* DC,
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_Fix_comp_27(  real* DC,
-									  real* DF,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC)
+                                      real* DF,
+                                      unsigned int* neighborCX,
+                                      unsigned int* neighborCY,
+                                      unsigned int* neighborCZ,
+                                      unsigned int* neighborFX,
+                                      unsigned int* neighborFY,
+                                      unsigned int* neighborFZ,
+                                      unsigned int size_MatC,
+                                      unsigned int size_MatF,
+                                      bool isEvenTimestep,
+                                      unsigned int* posC,
+                                      unsigned int* posFSWB,
+                                      unsigned int kFC,
+                                      real omCoarse,
+                                      real omFine,
+                                      real nu,
+                                      unsigned int nxC,
+                                      unsigned int nyC,
+                                      unsigned int nxF,
+                                      unsigned int nyF,
+                                      unsigned int numberOfThreads,
+                                      OffFC offFC)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_Fix_comp_27<<< grid.grid, grid.threads >>> ( DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
+                                                       DF,
+                                                       neighborCX,
+                                                       neighborCY,
+                                                       neighborCZ,
+                                                       neighborFX,
+                                                       neighborFY,
+                                                       neighborFZ,
+                                                       size_MatC,
+                                                       size_MatF,
+                                                       isEvenTimestep,
+                                                       posC,
+                                                       posFSWB,
+                                                       kFC,
+                                                       omCoarse,
+                                                       omFine,
+                                                       nu,
+                                                       nxC,
+                                                       nyC,
+                                                       nxF,
+                                                       nyF,
+                                                       offFC);
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_0817_comp_27( real* DC,
-									  real* DF,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC,
+                                      real* DF,
+                                      unsigned int* neighborCX,
+                                      unsigned int* neighborCY,
+                                      unsigned int* neighborCZ,
+                                      unsigned int* neighborFX,
+                                      unsigned int* neighborFY,
+                                      unsigned int* neighborFZ,
+                                      unsigned int size_MatC,
+                                      unsigned int size_MatF,
+                                      bool isEvenTimestep,
+                                      unsigned int* posC,
+                                      unsigned int* posFSWB,
+                                      unsigned int kFC,
+                                      real omCoarse,
+                                      real omFine,
+                                      real nu,
+                                      unsigned int nxC,
+                                      unsigned int nyC,
+                                      unsigned int nxF,
+                                      unsigned int nyF,
+                                      unsigned int numberOfThreads,
+                                      OffFC offFC,
                              CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_0817_comp_27<<< grid.grid, grid.threads, 0, stream >>> (DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
+                                                       DF,
+                                                       neighborCX,
+                                                       neighborCY,
+                                                       neighborCZ,
+                                                       neighborFX,
+                                                       neighborFY,
+                                                       neighborFZ,
+                                                       size_MatC,
+                                                       size_MatF,
+                                                       isEvenTimestep,
+                                                       posC,
+                                                       posFSWB,
+                                                       kFC,
+                                                       omCoarse,
+                                                       omFine,
+                                                       nu,
+                                                       nxC,
+                                                       nyC,
+                                                       nxF,
+                                                       nyF,
+                                                       offFC);
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_comp_D3Q27F3_2018( real* DC,
-										   real* DF,
-										   real* G6,
-										   unsigned int* neighborCX,
-										   unsigned int* neighborCY,
-										   unsigned int* neighborCZ,
-										   unsigned int* neighborFX,
-										   unsigned int* neighborFY,
-										   unsigned int* neighborFZ,
-										   unsigned int size_MatC,
-										   unsigned int size_MatF,
-										   bool isEvenTimestep,
-										   unsigned int* posC,
-										   unsigned int* posFSWB,
-										   unsigned int kFC,
-										   real omCoarse,
-										   real omFine,
-										   real nu,
-										   unsigned int nxC,
-										   unsigned int nyC,
-										   unsigned int nxF,
-										   unsigned int nyF,
-										   unsigned int numberOfThreads,
-										   OffFC offFC)
+                                           real* DF,
+                                           real* G6,
+                                           unsigned int* neighborCX,
+                                           unsigned int* neighborCY,
+                                           unsigned int* neighborCZ,
+                                           unsigned int* neighborFX,
+                                           unsigned int* neighborFY,
+                                           unsigned int* neighborFZ,
+                                           unsigned int size_MatC,
+                                           unsigned int size_MatF,
+                                           bool isEvenTimestep,
+                                           unsigned int* posC,
+                                           unsigned int* posFSWB,
+                                           unsigned int kFC,
+                                           real omCoarse,
+                                           real omFine,
+                                           real nu,
+                                           unsigned int nxC,
+                                           unsigned int nyC,
+                                           unsigned int nxF,
+                                           unsigned int nyF,
+                                           unsigned int numberOfThreads,
+                                           OffFC offFC)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
      scaleFC_comp_D3Q27F3_2018 <<< grid.grid, grid.threads >>> (DC,
-															DF,
-															G6,
-															neighborCX,
-															neighborCY,
-															neighborCZ,
-															neighborFX,
-															neighborFY,
-															neighborFZ,
-															size_MatC,
-															size_MatF,
-															isEvenTimestep,
-															posC,
-															posFSWB,
-															kFC,
-															omCoarse,
-															omFine,
-															nu,
-															nxC,
-															nyC,
-															nxF,
-															nyF,
-															offFC);
+                                                            DF,
+                                                            G6,
+                                                            neighborCX,
+                                                            neighborCY,
+                                                            neighborCZ,
+                                                            neighborFX,
+                                                            neighborFY,
+                                                            neighborFZ,
+                                                            size_MatC,
+                                                            size_MatF,
+                                                            isEvenTimestep,
+                                                            posC,
+                                                            posFSWB,
+                                                            kFC,
+                                                            omCoarse,
+                                                            omFine,
+                                                            nu,
+                                                            nxC,
+                                                            nyC,
+                                                            nxF,
+                                                            nyF,
+                                                            offFC);
       getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_comp_D3Q27F3( real* DC,
-									  real* DF,
-									  real* G6,
-									  unsigned int* neighborCX,
-									  unsigned int* neighborCY,
-									  unsigned int* neighborCZ,
-									  unsigned int* neighborFX,
-									  unsigned int* neighborFY,
-									  unsigned int* neighborFZ,
-									  unsigned int size_MatC,
-									  unsigned int size_MatF,
-									  bool isEvenTimestep,
-									  unsigned int* posC,
-									  unsigned int* posFSWB,
-									  unsigned int kFC,
-									  real omCoarse,
-									  real omFine,
-									  real nu,
-									  unsigned int nxC,
-									  unsigned int nyC,
-									  unsigned int nxF,
-									  unsigned int nyF,
-									  unsigned int numberOfThreads,
-									  OffFC offFC,
+                                      real* DF,
+                                      real* G6,
+                                      unsigned int* neighborCX,
+                                      unsigned int* neighborCY,
+                                      unsigned int* neighborCZ,
+                                      unsigned int* neighborFX,
+                                      unsigned int* neighborFY,
+                                      unsigned int* neighborFZ,
+                                      unsigned int size_MatC,
+                                      unsigned int size_MatF,
+                                      bool isEvenTimestep,
+                                      unsigned int* posC,
+                                      unsigned int* posFSWB,
+                                      unsigned int kFC,
+                                      real omCoarse,
+                                      real omFine,
+                                      real nu,
+                                      unsigned int nxC,
+                                      unsigned int nyC,
+                                      unsigned int nxF,
+                                      unsigned int nyF,
+                                      unsigned int numberOfThreads,
+                                      OffFC offFC,
                              CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
      scaleFC_comp_D3Q27F3 <<< grid.grid, grid.threads, 0, stream >>> (DC,
-													   DF,
-													   G6,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
+                                                       DF,
+                                                       G6,
+                                                       neighborCX,
+                                                       neighborCY,
+                                                       neighborCZ,
+                                                       neighborFX,
+                                                       neighborFY,
+                                                       neighborFZ,
+                                                       size_MatC,
+                                                       size_MatF,
+                                                       isEvenTimestep,
+                                                       posC,
+                                                       posFSWB,
+                                                       kFC,
+                                                       omCoarse,
+                                                       omFine,
+                                                       nu,
+                                                       nxC,
+                                                       nyC,
+                                                       nxF,
+                                                       nyF,
+                                                       offFC);
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_staggered_time_comp_27(   real* DC,
-												  real* DF,
-												  unsigned int* neighborCX,
-												  unsigned int* neighborCY,
-												  unsigned int* neighborCZ,
-												  unsigned int* neighborFX,
-												  unsigned int* neighborFY,
-												  unsigned int* neighborFZ,
-												  unsigned int size_MatC,
-												  unsigned int size_MatF,
-												  bool isEvenTimestep,
-												  unsigned int* posC,
-												  unsigned int* posFSWB,
-												  unsigned int kFC,
-												  real omCoarse,
-												  real omFine,
-												  real nu,
-												  unsigned int nxC,
-												  unsigned int nyC,
-												  unsigned int nxF,
-												  unsigned int nyF,
-												  unsigned int numberOfThreads,
-												  OffFC offFC)
+                                                  real* DF,
+                                                  unsigned int* neighborCX,
+                                                  unsigned int* neighborCY,
+                                                  unsigned int* neighborCZ,
+                                                  unsigned int* neighborFX,
+                                                  unsigned int* neighborFY,
+                                                  unsigned int* neighborFZ,
+                                                  unsigned int size_MatC,
+                                                  unsigned int size_MatF,
+                                                  bool isEvenTimestep,
+                                                  unsigned int* posC,
+                                                  unsigned int* posFSWB,
+                                                  unsigned int kFC,
+                                                  real omCoarse,
+                                                  real omFine,
+                                                  real nu,
+                                                  unsigned int nxC,
+                                                  unsigned int nyC,
+                                                  unsigned int nxF,
+                                                  unsigned int nyF,
+                                                  unsigned int numberOfThreads,
+                                                  OffFC offFC)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_staggered_time_comp_27<<< grid.grid, grid.threads >>> (  DC,
-																   DF,
-																   neighborCX,
-																   neighborCY,
-																   neighborCZ,
-																   neighborFX,
-																   neighborFY,
-																   neighborFZ,
-																   size_MatC,
-																   size_MatF,
-																   isEvenTimestep,
-																   posC,
-																   posFSWB,
-																   kFC,
-																   omCoarse,
-																   omFine,
-																   nu,
-																   nxC,
-																   nyC,
-																   nxF,
-																   nyF,
-																   offFC);
+                                                                   DF,
+                                                                   neighborCX,
+                                                                   neighborCY,
+                                                                   neighborCZ,
+                                                                   neighborFX,
+                                                                   neighborFY,
+                                                                   neighborFZ,
+                                                                   size_MatC,
+                                                                   size_MatF,
+                                                                   isEvenTimestep,
+                                                                   posC,
+                                                                   posFSWB,
+                                                                   kFC,
+                                                                   omCoarse,
+                                                                   omFine,
+                                                                   nu,
+                                                                   nxC,
+                                                                   nyC,
+                                                                   nxF,
+                                                                   nyF,
+                                                                   offFC);
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -4818,157 +4818,157 @@ void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
-											  real* DF,
-											  unsigned int* neighborCX,
-											  unsigned int* neighborCY,
-											  unsigned int* neighborCZ,
-											  unsigned int* neighborFX,
-											  unsigned int* neighborFY,
-											  unsigned int* neighborFZ,
-											  unsigned int size_MatC,
-											  unsigned int size_MatF,
-											  bool isEvenTimestep,
-											  unsigned int* posC,
-											  unsigned int* posFSWB,
-											  unsigned int kFC,
-											  real omCoarse,
-											  real omFine,
-											  real nu,
-											  unsigned int nxC,
-											  unsigned int nyC,
-											  unsigned int nxF,
-											  unsigned int nyF,
-											  unsigned int numberOfThreads,
-											  OffFC offFC,
+                                              real* DF,
+                                              unsigned int* neighborCX,
+                                              unsigned int* neighborCY,
+                                              unsigned int* neighborCZ,
+                                              unsigned int* neighborFX,
+                                              unsigned int* neighborFY,
+                                              unsigned int* neighborFZ,
+                                              unsigned int size_MatC,
+                                              unsigned int size_MatF,
+                                              bool isEvenTimestep,
+                                              unsigned int* posC,
+                                              unsigned int* posFSWB,
+                                              unsigned int kFC,
+                                              real omCoarse,
+                                              real omFine,
+                                              real nu,
+                                              unsigned int nxC,
+                                              unsigned int nyC,
+                                              unsigned int nxF,
+                                              unsigned int nyF,
+                                              unsigned int numberOfThreads,
+                                              OffFC offFC,
                                    CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_RhoSq_3rdMom_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-															  DF,
-															  neighborCX,
-															  neighborCY,
-															  neighborCZ,
-															  neighborFX,
-															  neighborFY,
-															  neighborFZ,
-															  size_MatC,
-															  size_MatF,
-															  isEvenTimestep,
-															  posC,
-															  posFSWB,
-															  kFC,
-															  omCoarse,
-															  omFine,
-															  nu,
-															  nxC,
-															  nyC,
-															  nxF,
-															  nyF,
-															  offFC);
+                                                              DF,
+                                                              neighborCX,
+                                                              neighborCY,
+                                                              neighborCZ,
+                                                              neighborFX,
+                                                              neighborFY,
+                                                              neighborFZ,
+                                                              size_MatC,
+                                                              size_MatF,
+                                                              isEvenTimestep,
+                                                              posC,
+                                                              posFSWB,
+                                                              kFC,
+                                                              omCoarse,
+                                                              omFine,
+                                                              nu,
+                                                              nxC,
+                                                              nyC,
+                                                              nxF,
+                                                              nyF,
+                                                              offFC);
       getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_AA2016_comp_27( real* DC,
-										real* DF,
-										unsigned int* neighborCX,
-										unsigned int* neighborCY,
-										unsigned int* neighborCZ,
-										unsigned int* neighborFX,
-										unsigned int* neighborFY,
-										unsigned int* neighborFZ,
-										unsigned int size_MatC,
-										unsigned int size_MatF,
-										bool isEvenTimestep,
-										unsigned int* posC,
-										unsigned int* posFSWB,
-										unsigned int kFC,
-										real omCoarse,
-										real omFine,
-										real nu,
-										unsigned int nxC,
-										unsigned int nyC,
-										unsigned int nxF,
-										unsigned int nyF,
-										unsigned int numberOfThreads,
-										OffFC offFC,
+                                        real* DF,
+                                        unsigned int* neighborCX,
+                                        unsigned int* neighborCY,
+                                        unsigned int* neighborCZ,
+                                        unsigned int* neighborFX,
+                                        unsigned int* neighborFY,
+                                        unsigned int* neighborFZ,
+                                        unsigned int size_MatC,
+                                        unsigned int size_MatF,
+                                        bool isEvenTimestep,
+                                        unsigned int* posC,
+                                        unsigned int* posFSWB,
+                                        unsigned int kFC,
+                                        real omCoarse,
+                                        real omFine,
+                                        real nu,
+                                        unsigned int nxC,
+                                        unsigned int nyC,
+                                        unsigned int nxF,
+                                        unsigned int nyF,
+                                        unsigned int numberOfThreads,
+                                        OffFC offFC,
                               CUstream_st *stream)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_AA2016_comp_27<<< grid.grid, grid.threads, 0, stream >>>(DC,
-														DF,
-														neighborCX,
-														neighborCY,
-														neighborCZ,
-														neighborFX,
-														neighborFY,
-														neighborFZ,
-														size_MatC,
-														size_MatF,
-														isEvenTimestep,
-														posC,
-														posFSWB,
-														kFC,
-														omCoarse,
-														omFine,
-														nu,
-														nxC,
-														nyC,
-														nxF,
-														nyF,
-														offFC);
+                                                        DF,
+                                                        neighborCX,
+                                                        neighborCY,
+                                                        neighborCZ,
+                                                        neighborFX,
+                                                        neighborFY,
+                                                        neighborFZ,
+                                                        size_MatC,
+                                                        size_MatF,
+                                                        isEvenTimestep,
+                                                        posC,
+                                                        posFSWB,
+                                                        kFC,
+                                                        omCoarse,
+                                                        omFine,
+                                                        nu,
+                                                        nxC,
+                                                        nyC,
+                                                        nxF,
+                                                        nyF,
+                                                        offFC);
       getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_NSPress_27(real* DC,
-								  real* DF,
-								  unsigned int* neighborCX,
-								  unsigned int* neighborCY,
-								  unsigned int* neighborCZ,
-								  unsigned int* neighborFX,
-								  unsigned int* neighborFY,
-								  unsigned int* neighborFZ,
-								  unsigned int size_MatC,
-								  unsigned int size_MatF,
-								  bool isEvenTimestep,
-								  unsigned int* posC,
-								  unsigned int* posFSWB,
-								  unsigned int kFC,
-								  real omCoarse,
-								  real omFine,
-								  real nu,
-								  unsigned int nxC,
-								  unsigned int nyC,
-								  unsigned int nxF,
-								  unsigned int nyF,
-								  unsigned int numberOfThreads,
-								  OffFC offFC)
+                                  real* DF,
+                                  unsigned int* neighborCX,
+                                  unsigned int* neighborCY,
+                                  unsigned int* neighborCZ,
+                                  unsigned int* neighborFX,
+                                  unsigned int* neighborFY,
+                                  unsigned int* neighborFZ,
+                                  unsigned int size_MatC,
+                                  unsigned int size_MatF,
+                                  bool isEvenTimestep,
+                                  unsigned int* posC,
+                                  unsigned int* posFSWB,
+                                  unsigned int kFC,
+                                  real omCoarse,
+                                  real omFine,
+                                  real nu,
+                                  unsigned int nxC,
+                                  unsigned int nyC,
+                                  unsigned int nxF,
+                                  unsigned int nyF,
+                                  unsigned int numberOfThreads,
+                                  OffFC offFC)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
       scaleFC_NSPress_27<<< grid.grid, grid.threads >>> (  DC,
-													   DF,
-													   neighborCX,
-													   neighborCY,
-													   neighborCZ,
-													   neighborFX,
-													   neighborFY,
-													   neighborFZ,
-													   size_MatC,
-													   size_MatF,
-													   isEvenTimestep,
-													   posC,
-													   posFSWB,
-													   kFC,
-													   omCoarse,
-													   omFine,
-													   nu,
-													   nxC,
-													   nyC,
-													   nxF,
-													   nyF,
-													   offFC);
+                                                       DF,
+                                                       neighborCX,
+                                                       neighborCY,
+                                                       neighborCZ,
+                                                       neighborFX,
+                                                       neighborFY,
+                                                       neighborFZ,
+                                                       size_MatC,
+                                                       size_MatF,
+                                                       isEvenTimestep,
+                                                       posC,
+                                                       posFSWB,
+                                                       kFC,
+                                                       omCoarse,
+                                                       omFine,
+                                                       nu,
+                                                       nxC,
+                                                       nyC,
+                                                       nxF,
+                                                       nyF,
+                                                       offFC);
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
@@ -5079,7 +5079,7 @@ void ScaleFCThS27( real* DC,
                               real nu,
                               real diffusivity_coarse,
                               unsigned int numberOfThreads,
-							  OffFC offFC)
+                              OffFC offFC)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, kFC);
 
@@ -5101,501 +5101,501 @@ void ScaleFCThS27( real* DC,
                                                 kFC,
                                                 nu,
                                                 diffusivity_coarse,
-												offFC);
+                                                offFC);
       getLastCudaError("scaleFCThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void DragLiftPostD27(real* DD,
-								int* k_Q,
-								real* QQ,
-								int numberOfBCnodes,
-								double *DragX,
-								double *DragY,
-								double *DragZ,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
+                                int* k_Q,
+                                real* QQ,
+                                int numberOfBCnodes,
+                                double *DragX,
+                                double *DragY,
+                                double *DragZ,
+                                unsigned int* neighborX,
+                                unsigned int* neighborY,
+                                unsigned int* neighborZ,
+                                unsigned int size_Mat,
+                                bool isEvenTimestep,
+                                unsigned int numberOfThreads)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
-	DragLiftPost27<<< grid.grid, grid.threads >>>(DD,
-										k_Q,
-										QQ,
-										numberOfBCnodes,
-										DragX,
-										DragY,
-										DragZ,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("DragLift27 execution failed");
+    DragLiftPost27<<< grid.grid, grid.threads >>>(DD,
+                                        k_Q,
+                                        QQ,
+                                        numberOfBCnodes,
+                                        DragX,
+                                        DragY,
+                                        DragZ,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        size_Mat,
+                                        isEvenTimestep);
+    getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void DragLiftPreD27( real* DD,
-								int* k_Q,
-								real* QQ,
-								int numberOfBCnodes,
-								double *DragX,
-								double *DragY,
-								double *DragZ,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
-
-	DragLiftPre27<<< grid.grid, grid.threads >>>( DD,
-										k_Q,
-										QQ,
-										numberOfBCnodes,
-										DragX,
-										DragY,
-										DragZ,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("DragLift27 execution failed");
+                                int* k_Q,
+                                real* QQ,
+                                int numberOfBCnodes,
+                                double *DragX,
+                                double *DragY,
+                                double *DragZ,
+                                unsigned int* neighborX,
+                                unsigned int* neighborY,
+                                unsigned int* neighborZ,
+                                unsigned int size_Mat,
+                                bool isEvenTimestep,
+                                unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
+
+    DragLiftPre27<<< grid.grid, grid.threads >>>( DD,
+                                        k_Q,
+                                        QQ,
+                                        numberOfBCnodes,
+                                        DragX,
+                                        DragY,
+                                        DragZ,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        size_Mat,
+                                        isEvenTimestep);
+    getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcCPtop27(real* DD,
-							int* cpIndex,
-							int nonCp,
-							double *cpPress,
-							unsigned int* neighborX,
-							unsigned int* neighborY,
-							unsigned int* neighborZ,
-							unsigned int size_Mat,
-							bool isEvenTimestep,
-							unsigned int numberOfThreads)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
-
-	CalcCP27<<< grid.grid, grid.threads >>>(DD,
-								  cpIndex,
-								  nonCp,
-								  cpPress,
-								  neighborX,
-								  neighborY,
-								  neighborZ,
-								  size_Mat,
-								  isEvenTimestep);
-	getLastCudaError("CalcCP27 execution failed");
+                            int* cpIndex,
+                            int nonCp,
+                            double *cpPress,
+                            unsigned int* neighborX,
+                            unsigned int* neighborY,
+                            unsigned int* neighborZ,
+                            unsigned int size_Mat,
+                            bool isEvenTimestep,
+                            unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
+
+    CalcCP27<<< grid.grid, grid.threads >>>(DD,
+                                  cpIndex,
+                                  nonCp,
+                                  cpPress,
+                                  neighborX,
+                                  neighborY,
+                                  neighborZ,
+                                  size_Mat,
+                                  isEvenTimestep);
+    getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void CalcCPbottom27( real* DD,
-								int* cpIndex,
-								int nonCp,
-								double *cpPress,
-								unsigned int* neighborX,
-								unsigned int* neighborY,
-								unsigned int* neighborZ,
-								unsigned int size_Mat,
-								bool isEvenTimestep,
-								unsigned int numberOfThreads)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
-
-	CalcCP27<<< grid.grid, grid.threads >>>(DD,
-								  cpIndex,
-								  nonCp,
-								  cpPress,
-								  neighborX,
-								  neighborY,
-								  neighborZ,
-								  size_Mat,
-								  isEvenTimestep);
-	getLastCudaError("CalcCP27 execution failed");
+                                int* cpIndex,
+                                int nonCp,
+                                double *cpPress,
+                                unsigned int* neighborX,
+                                unsigned int* neighborY,
+                                unsigned int* neighborZ,
+                                unsigned int size_Mat,
+                                bool isEvenTimestep,
+                                unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonCp);
+
+    CalcCP27<<< grid.grid, grid.threads >>>(DD,
+                                  cpIndex,
+                                  nonCp,
+                                  cpPress,
+                                  neighborX,
+                                  neighborY,
+                                  neighborZ,
+                                  size_Mat,
+                                  isEvenTimestep);
+    getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void GetSendFsPreDev27(real* DD,
-								  real* bufferFs,
-								  int* sendIndex,
-								  int buffmax,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep,
-								  unsigned int numberOfThreads,
-								  cudaStream_t stream)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-										bufferFs,
-										sendIndex,
-										buffmax,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("getSendFsPre27 execution failed");
+                                  real* bufferFs,
+                                  int* sendIndex,
+                                  int buffmax,
+                                  unsigned int* neighborX,
+                                  unsigned int* neighborY,
+                                  unsigned int* neighborZ,
+                                  unsigned int size_Mat,
+                                  bool isEvenTimestep,
+                                  unsigned int numberOfThreads,
+                                  cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
+                                        bufferFs,
+                                        sendIndex,
+                                        buffmax,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        size_Mat,
+                                        isEvenTimestep);
+    getLastCudaError("getSendFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void GetSendFsPostDev27(real* DD,
-								   real* bufferFs,
-								   int* sendIndex,
-								   int buffmax,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep,
-								   unsigned int numberOfThreads,
-								   cudaStream_t stream)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-										 bufferFs,
-										 sendIndex,
-										 buffmax,
-										 neighborX,
-										 neighborY,
-										 neighborZ,
-										 size_Mat,
-										 isEvenTimestep);
-	getLastCudaError("getSendFsPost27 execution failed");
+                                   real* bufferFs,
+                                   int* sendIndex,
+                                   int buffmax,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep,
+                                   unsigned int numberOfThreads,
+                                   cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
+                                         bufferFs,
+                                         sendIndex,
+                                         buffmax,
+                                         neighborX,
+                                         neighborY,
+                                         neighborZ,
+                                         size_Mat,
+                                         isEvenTimestep);
+    getLastCudaError("getSendFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void SetRecvFsPreDev27(real* DD,
-								  real* bufferFs,
-								  int* recvIndex,
-								  int buffmax,
-								  unsigned int* neighborX,
-								  unsigned int* neighborY,
-								  unsigned int* neighborZ,
-								  unsigned int size_Mat,
-								  bool isEvenTimestep,
-								  unsigned int numberOfThreads,
-	                              cudaStream_t stream)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-										bufferFs,
-										recvIndex,
-										buffmax,
-										neighborX,
-										neighborY,
-										neighborZ,
-										size_Mat,
-										isEvenTimestep);
-	getLastCudaError("setRecvFsPre27 execution failed");
+                                  real* bufferFs,
+                                  int* recvIndex,
+                                  int buffmax,
+                                  unsigned int* neighborX,
+                                  unsigned int* neighborY,
+                                  unsigned int* neighborZ,
+                                  unsigned int size_Mat,
+                                  bool isEvenTimestep,
+                                  unsigned int numberOfThreads,
+                                  cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvFsPre27<<< grid.grid, grid.threads, 0, stream >>>(DD,
+                                        bufferFs,
+                                        recvIndex,
+                                        buffmax,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        size_Mat,
+                                        isEvenTimestep);
+    getLastCudaError("setRecvFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void SetRecvFsPostDev27(real* DD,
-								   real* bufferFs,
-								   int* recvIndex,
-								   int buffmax,
-								   unsigned int* neighborX,
-								   unsigned int* neighborY,
-								   unsigned int* neighborZ,
-								   unsigned int size_Mat,
-								   bool isEvenTimestep,
-	                               unsigned int numberOfThreads,
-	                               cudaStream_t stream)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
-										 bufferFs,
-										 recvIndex,
-										 buffmax,
-										 neighborX,
-										 neighborY,
-										 neighborZ,
-										 size_Mat,
-										 isEvenTimestep);
-	getLastCudaError("setRecvFsPost27 execution failed");
+                                   real* bufferFs,
+                                   int* recvIndex,
+                                   int buffmax,
+                                   unsigned int* neighborX,
+                                   unsigned int* neighborY,
+                                   unsigned int* neighborZ,
+                                   unsigned int size_Mat,
+                                   bool isEvenTimestep,
+                                   unsigned int numberOfThreads,
+                                   cudaStream_t stream)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvFsPost27<<< grid.grid, grid.threads, 0, stream >>>(DD,
+                                         bufferFs,
+                                         recvIndex,
+                                         buffmax,
+                                         neighborX,
+                                         neighborY,
+                                         neighborZ,
+                                         size_Mat,
+                                         isEvenTimestep);
+    getLastCudaError("setRecvFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void getSendGsDevF3(
-	real* G6,
-	real* bufferGs,
-	int* sendIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep,
-	unsigned int numberOfThreads)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	getSendGsF3 <<< grid.grid, grid.threads >>> (
-		G6,
-		bufferGs,
-		sendIndex,
-		buffmax,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("getSendGsF3 execution failed");
+    real* G6,
+    real* bufferGs,
+    int* sendIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    getSendGsF3 <<< grid.grid, grid.threads >>> (
+        G6,
+        bufferGs,
+        sendIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("getSendGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void setRecvGsDevF3(
-	real* G6,
-	real* bufferGs,
-	int* recvIndex,
-	int buffmax,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	unsigned int size_Mat,
-	bool isEvenTimestep,
-	unsigned int numberOfThreads)
-{
-	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
-
-	setRecvGsF3 <<< grid.grid, grid.threads >>> (
-		G6,
-		bufferGs,
-		recvIndex,
-		buffmax,
-		neighborX,
-		neighborY,
-		neighborZ,
-		size_Mat,
-		isEvenTimestep);
-	getLastCudaError("setRecvGsF3 execution failed");
+    real* G6,
+    real* bufferGs,
+    int* recvIndex,
+    int buffmax,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    unsigned int size_Mat,
+    bool isEvenTimestep,
+    unsigned int numberOfThreads)
+{
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, buffmax);
+
+    setRecvGsF3 <<< grid.grid, grid.threads >>> (
+        G6,
+        bufferGs,
+        recvIndex,
+        buffmax,
+        neighborX,
+        neighborY,
+        neighborZ,
+        size_Mat,
+        isEvenTimestep);
+    getLastCudaError("setRecvGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void WallFuncDev27(unsigned int numberOfThreads,
-							  real* vx,
-							  real* vy,
-							  real* vz,
-							  real* DD,
-							  int* k_Q,
-							  real* QQ,
-							  unsigned int numberOfBCnodes,
-							  real om1,
-							  unsigned int* neighborX,
-							  unsigned int* neighborY,
-							  unsigned int* neighborZ,
-							  unsigned int size_Mat,
-							  bool isEvenTimestep)
+                              real* vx,
+                              real* vy,
+                              real* vz,
+                              real* DD,
+                              int* k_Q,
+                              real* QQ,
+                              unsigned int numberOfBCnodes,
+                              real om1,
+                              unsigned int* neighborX,
+                              unsigned int* neighborY,
+                              unsigned int* neighborZ,
+                              unsigned int size_Mat,
+                              bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfBCnodes);
 
       WallFunction27<<< grid.grid, grid.threads >>> (
-											  vx,
-											  vy,
-											  vz,
-											  DD,
-											  k_Q,
-											  QQ,
-											  numberOfBCnodes,
-											  om1,
-											  neighborX,
-											  neighborY,
-											  neighborZ,
-											  size_Mat,
-											  isEvenTimestep);
+                                              vx,
+                                              vy,
+                                              vz,
+                                              DD,
+                                              k_Q,
+                                              QQ,
+                                              numberOfBCnodes,
+                                              om1,
+                                              neighborX,
+                                              neighborY,
+                                              neighborZ,
+                                              size_Mat,
+                                              isEvenTimestep);
       getLastCudaError("WallFunction27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
-										  real* vxD,
-										  real* vyD,
-										  real* vzD,
-										  real* vxWall,
-										  real* vyWall,
-										  real* vzWall,
-										  int numberOfWallNodes,
-										  int* kWallNodes,
-										  real* rhoD,
-										  real* pressD,
-										  unsigned int* geoD,
-										  unsigned int* neighborX,
-										  unsigned int* neighborY,
-										  unsigned int* neighborZ,
-										  unsigned int size_Mat,
-										  real* DD,
-										  bool isEvenTimestep)
+                                          real* vxD,
+                                          real* vyD,
+                                          real* vzD,
+                                          real* vxWall,
+                                          real* vyWall,
+                                          real* vzWall,
+                                          int numberOfWallNodes,
+                                          int* kWallNodes,
+                                          real* rhoD,
+                                          real* pressD,
+                                          unsigned int* geoD,
+                                          unsigned int* neighborX,
+                                          unsigned int* neighborY,
+                                          unsigned int* neighborZ,
+                                          unsigned int size_Mat,
+                                          real* DD,
+                                          bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfWallNodes);
 
       LBSetOutputWallVelocitySP27<<< grid.grid, grid.threads >>> (	vxD,
-															vyD,
-															vzD,
-															vxWall,
-															vyWall,
-															vzWall,
-															numberOfWallNodes,
-															kWallNodes,
-															rhoD,
-															pressD,
-															geoD,
-															neighborX,
-															neighborY,
-															neighborZ,
-															size_Mat,
-															DD,
-															isEvenTimestep);
+                                                            vyD,
+                                                            vzD,
+                                                            vxWall,
+                                                            vyWall,
+                                                            vzWall,
+                                                            numberOfWallNodes,
+                                                            kWallNodes,
+                                                            rhoD,
+                                                            pressD,
+                                                            geoD,
+                                                            neighborX,
+                                                            neighborY,
+                                                            neighborZ,
+                                                            size_Mat,
+                                                            DD,
+                                                            isEvenTimestep);
       getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void GetVelotoForce27(unsigned int numberOfThreads,
-								 real* DD,
-								 int* bcIndex,
-								 int nonAtBC,
-								 real* Vx,
-								 real* Vy,
-								 real* Vz,
-								 unsigned int* neighborX,
-								 unsigned int* neighborY,
-								 unsigned int* neighborZ,
-								 unsigned int size_Mat,
-								 bool isEvenTimestep)
+                                 real* DD,
+                                 int* bcIndex,
+                                 int nonAtBC,
+                                 real* Vx,
+                                 real* Vy,
+                                 real* Vz,
+                                 unsigned int* neighborX,
+                                 unsigned int* neighborY,
+                                 unsigned int* neighborZ,
+                                 unsigned int size_Mat,
+                                 bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, nonAtBC);
 
       GetVeloforForcing27<<< grid.grid, grid.threads >>> (DD,
-												bcIndex,
-												nonAtBC,
-												Vx,
-												Vy,
-												Vz,
-												neighborX,
-												neighborY,
-												neighborZ,
-												size_Mat,
-												isEvenTimestep);
+                                                bcIndex,
+                                                nonAtBC,
+                                                Vx,
+                                                Vy,
+                                                Vz,
+                                                neighborX,
+                                                neighborY,
+                                                neighborZ,
+                                                size_Mat,
+                                                isEvenTimestep);
       getLastCudaError("GetVeloforForcing27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void InitParticlesDevice(real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* coordParticleXlocal,
-									real* coordParticleYlocal,
-									real* coordParticleZlocal,
-									real* coordParticleXglobal,
-									real* coordParticleYglobal,
-									real* coordParticleZglobal,
-									real* veloParticleX,
-									real* veloParticleY,
-									real* veloParticleZ,
-									real* randArray,
-									unsigned int* particleID,
-									unsigned int* cellBaseID,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-									int level,
-									unsigned int numberOfParticles,
-									unsigned int size_Mat,
-									unsigned int numberOfThreads)
+                                    real* coordY,
+                                    real* coordZ,
+                                    real* coordParticleXlocal,
+                                    real* coordParticleYlocal,
+                                    real* coordParticleZlocal,
+                                    real* coordParticleXglobal,
+                                    real* coordParticleYglobal,
+                                    real* coordParticleZglobal,
+                                    real* veloParticleX,
+                                    real* veloParticleY,
+                                    real* veloParticleZ,
+                                    real* randArray,
+                                    unsigned int* particleID,
+                                    unsigned int* cellBaseID,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int* neighborWSB,
+                                    int level,
+                                    unsigned int numberOfParticles,
+                                    unsigned int size_Mat,
+                                    unsigned int numberOfThreads)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
 
    InitParticles<<< grid.grid, grid.threads >>> (coordX,
-										coordY,
-										coordZ,
-										coordParticleXlocal,
-										coordParticleYlocal,
-										coordParticleZlocal,
-										coordParticleXglobal,
-										coordParticleYglobal,
-										coordParticleZglobal,
-										veloParticleX,
-										veloParticleY,
-										veloParticleZ,
-										randArray,
-										particleID,
-										cellBaseID,
-										bcMatD,
-										neighborX,
-										neighborY,
-										neighborZ,
-										neighborWSB,
-										level,
-										numberOfParticles,
-										size_Mat);
+                                        coordY,
+                                        coordZ,
+                                        coordParticleXlocal,
+                                        coordParticleYlocal,
+                                        coordParticleZlocal,
+                                        coordParticleXglobal,
+                                        coordParticleYglobal,
+                                        coordParticleZglobal,
+                                        veloParticleX,
+                                        veloParticleY,
+                                        veloParticleZ,
+                                        randArray,
+                                        particleID,
+                                        cellBaseID,
+                                        bcMatD,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        neighborWSB,
+                                        level,
+                                        numberOfParticles,
+                                        size_Mat);
       getLastCudaError("InitParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void MoveParticlesDevice(real* coordX,
-									real* coordY,
-									real* coordZ,
-									real* coordParticleXlocal,
-									real* coordParticleYlocal,
-									real* coordParticleZlocal,
-									real* coordParticleXglobal,
-									real* coordParticleYglobal,
-									real* coordParticleZglobal,
-									real* veloParticleX,
-									real* veloParticleY,
-									real* veloParticleZ,
-									real* DD,
-									real  omega,
-									unsigned int* particleID,
-									unsigned int* cellBaseID,
-									unsigned int* bcMatD,
-									unsigned int* neighborX,
-									unsigned int* neighborY,
-									unsigned int* neighborZ,
-									unsigned int* neighborWSB,
-							        int level,
-									unsigned int timestep,
-									unsigned int numberOfTimesteps,
-									unsigned int numberOfParticles,
-									unsigned int size_Mat,
-									unsigned int numberOfThreads,
-									bool isEvenTimestep)
+                                    real* coordY,
+                                    real* coordZ,
+                                    real* coordParticleXlocal,
+                                    real* coordParticleYlocal,
+                                    real* coordParticleZlocal,
+                                    real* coordParticleXglobal,
+                                    real* coordParticleYglobal,
+                                    real* coordParticleZglobal,
+                                    real* veloParticleX,
+                                    real* veloParticleY,
+                                    real* veloParticleZ,
+                                    real* DD,
+                                    real  omega,
+                                    unsigned int* particleID,
+                                    unsigned int* cellBaseID,
+                                    unsigned int* bcMatD,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int* neighborWSB,
+                                    int level,
+                                    unsigned int timestep,
+                                    unsigned int numberOfTimesteps,
+                                    unsigned int numberOfParticles,
+                                    unsigned int size_Mat,
+                                    unsigned int numberOfThreads,
+                                    bool isEvenTimestep)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, numberOfParticles);
 
    MoveParticles<<< grid.grid, grid.threads >>> (coordX,
-										coordY,
-										coordZ,
-										coordParticleXlocal,
-										coordParticleYlocal,
-										coordParticleZlocal,
-										coordParticleXglobal,
-										coordParticleYglobal,
-										coordParticleZglobal,
-										veloParticleX,
-										veloParticleY,
-										veloParticleZ,
-										DD,
-										omega,
-										particleID,
-										cellBaseID,
-										bcMatD,
-										neighborX,
-										neighborY,
-										neighborZ,
-										neighborWSB,
-										level,
-										timestep,
-										numberOfTimesteps,
-										numberOfParticles,
-										size_Mat,
-										isEvenTimestep);
+                                        coordY,
+                                        coordZ,
+                                        coordParticleXlocal,
+                                        coordParticleYlocal,
+                                        coordParticleZlocal,
+                                        coordParticleXglobal,
+                                        coordParticleYglobal,
+                                        coordParticleZglobal,
+                                        veloParticleX,
+                                        veloParticleY,
+                                        veloParticleZ,
+                                        DD,
+                                        omega,
+                                        particleID,
+                                        cellBaseID,
+                                        bcMatD,
+                                        neighborX,
+                                        neighborY,
+                                        neighborZ,
+                                        neighborWSB,
+                                        level,
+                                        timestep,
+                                        numberOfTimesteps,
+                                        numberOfParticles,
+                                        size_Mat,
+                                        isEvenTimestep);
       getLastCudaError("MoveParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
 void initRandomDevice(curandState* state,
-								 unsigned int size_Mat,
-								 unsigned int numberOfThreads)
+                                 unsigned int size_Mat,
+                                 unsigned int numberOfThreads)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
    initRandom<<< grid.grid, grid.threads >>> (state);
@@ -5603,9 +5603,9 @@ void initRandomDevice(curandState* state,
 }
 //////////////////////////////////////////////////////////////////////////
 void generateRandomValuesDevice( curandState* state,
-											unsigned int size_Mat,
-											real* randArray,
-											unsigned int numberOfThreads)
+                                            unsigned int size_Mat,
+                                            real* randArray,
+                                            unsigned int numberOfThreads)
 {
    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(numberOfThreads, size_Mat);
    generateRandomValues<<< grid.grid, grid.threads >>> (state,randArray);
@@ -5636,7 +5636,7 @@ void CalcTurbulenceIntensityDevice(
      vxx,
      vyy,
      vzz,
-	 vxy,
+     vxy,
      vxz,
      vyz,
      vx_mean,
@@ -5652,16 +5652,3 @@ void CalcTurbulenceIntensityDevice(
 
    getLastCudaError("CalcTurbulenceIntensity execution failed");
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
index 78b190e37ebaa395c89aae3b47cd4cc4f3147306..b17ffefd13a8a3a6048dde69ffb1db6c5def23e1 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
@@ -1,4 +1,4 @@
-#include "LBM/LB.h" 
+#include "LBM/LB.h"
 #include <lbm/constants/NumericConstants.h>
 #include <lbm/constants/D3Q27.h>
 #include <lbm/MacroscopicQuantities.h>
@@ -16,18 +16,18 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
                                                 real omega,
                                                 real* distributions,
                                                 real* subgridDistances,
-                                                uint* neighborX, 
-                                                uint* neighborY, 
+                                                uint* neighborX,
+                                                uint* neighborY,
                                                 uint* neighborZ,
-                                                uint* neighbors0PP, 
+                                                uint* neighbors0PP,
                                                 uint* neighbors0PM,
                                                 uint* neighbors0MP,
                                                 uint* neighbors0MM,
-                                                real* weights0PP, 
+                                                real* weights0PP,
                                                 real* weights0PM,
                                                 real* weights0MP,
                                                 real* weights0MM,
-                                                real* vLast, 
+                                                real* vLast,
                                                 real* vCurrent,
                                                 real velocityX,
                                                 real velocityY,
@@ -43,8 +43,8 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
 
     ////////////////////////////////////////////////////////////////////////////////
     // interpolation of velocity
-    real vxLastInterpd, vyLastInterpd, vzLastInterpd; 
-    real vxNextInterpd, vyNextInterpd, vzNextInterpd; 
+    real vxLastInterpd, vyLastInterpd, vzLastInterpd;
+    real vxNextInterpd, vyNextInterpd, vzNextInterpd;
 
     uint kNeighbor0PP = neighbors0PP[k];
     real d0PP = weights0PP[k];
@@ -90,7 +90,7 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
 
     // if(k==16300)s printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
     real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
-    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; 
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio;
     real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
@@ -156,29 +156,29 @@ __global__ void QPrecursorDeviceCompZeroPress( 	int* subgridDistanceIndices,
     real f_PPP = (dist.f[DIR_MMM])[kMMM];
     real f_MPP = (dist.f[DIR_PMM])[kPMM];
     real f_PMP = (dist.f[DIR_MPM])[kMPM];
-    
+
     SubgridDistances27 subgridD;
     getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
-    
+
     ////////////////////////////////////////////////////////////////////////////////
       real drho   =  f_PMP + f_MPP + f_PPP + f_MMP + f_PMM + f_MPM + f_PPM + f_MMM +
-                     f_0PM + f_0PP + f_0MP + f_0MM + f_P0M + f_M0P + f_P0P + f_M0M + f_PM0 + f_MP0 + f_PP0 + f_MM0 + 
-                     f_00P + f_00M + f_0P0 + f_0M0 + f_P00 + f_M00 + ((dist.f[DIR_000])[k000]); 
+                     f_0PM + f_0PP + f_0MP + f_0MM + f_P0M + f_M0P + f_P0P + f_M0M + f_PM0 + f_MP0 + f_PP0 + f_MM0 +
+                     f_00P + f_00M + f_0P0 + f_0M0 + f_P00 + f_M00 + ((dist.f[DIR_000])[k000]);
 
       real vx1 =  (((f_PMP - f_MPM) - (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) +
                       ((f_P0M - f_M0P)   + (f_P0P - f_M0M))   + ((f_PM0 - f_MP0)   + (f_PP0 - f_MM0)) +
-                      (f_P00 - f_M00)) / (c1o1 + drho); 
-         
+                      (f_P00 - f_M00)) / (c1o1 + drho);
+
 
       real vx2 =   ((-(f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) - (f_MMP - f_PPM)) +
                        ((f_0PM - f_0MP)   + (f_0PP - f_0MM))    + (-(f_PM0 - f_MP0)  + (f_PP0 - f_MM0)) +
-                       (f_0P0 - f_0M0)) / (c1o1 + drho); 
+                       (f_0P0 - f_0M0)) / (c1o1 + drho);
 
       real vx3 =   (((f_PMP - f_MPM) + (f_MPP - f_PMM)) + ((f_PPP - f_MMM) + (f_MMP - f_PPM)) +
                        (-(f_0PM - f_0MP)  + (f_0PP - f_0MM))   + ((f_P0P - f_M0M)   - (f_P0M - f_M0P)) +
-                       (f_00P - f_00M)) / (c1o1 + drho); 
+                       (f_00P - f_00M)) / (c1o1 + drho);
+
 
-    
     // if(k==16383 || k==0) printf("k %d kQ %d drho = %f u %f v %f w %f\n",k, KQK, drho, vx1, vx2, vx3);
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
     //////////////////////////////////////////////////////////////////////////
@@ -429,18 +429,18 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
                                         int numberOfPrecursorNodes,
                                         real omega,
                                         real* distributions,
-                                        uint* neighborX, 
-                                        uint* neighborY, 
+                                        uint* neighborX,
+                                        uint* neighborY,
                                         uint* neighborZ,
-                                        uint* neighbors0PP, 
+                                        uint* neighbors0PP,
                                         uint* neighbors0PM,
                                         uint* neighbors0MP,
                                         uint* neighbors0MM,
-                                        real* weights0PP, 
+                                        real* weights0PP,
                                         real* weights0PM,
                                         real* weights0MP,
                                         real* weights0MM,
-                                        real* vLast, 
+                                        real* vLast,
                                         real* vCurrent,
                                         real velocityX,
                                         real velocityY,
@@ -456,8 +456,8 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
 
     ////////////////////////////////////////////////////////////////////////////////
     // interpolation of velocity
-    real vxLastInterpd, vyLastInterpd, vzLastInterpd; 
-    real vxNextInterpd, vyNextInterpd, vzNextInterpd; 
+    real vxLastInterpd, vyLastInterpd, vzLastInterpd;
+    real vxNextInterpd, vyNextInterpd, vzNextInterpd;
 
     uint kNeighbor0PP = neighbors0PP[k];
     real d0PP = weights0PP[k];
@@ -503,7 +503,7 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
 
     // if(k==16300) printf("%f %f %f\n", vxLastInterpd, vyLastInterpd, vzLastInterpd);
     real VeloX = (velocityX + (1.f-timeRatio)*vxLastInterpd + timeRatio*vxNextInterpd)/velocityRatio;
-    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio; 
+    real VeloY = (velocityY + (1.f-timeRatio)*vyLastInterpd + timeRatio*vyNextInterpd)/velocityRatio;
     real VeloZ = (velocityZ + (1.f-timeRatio)*vzLastInterpd + timeRatio*vzNextInterpd)/velocityRatio;
     // From here on just a copy of QVelDeviceCompZeroPress
     ////////////////////////////////////////////////////////////////////////////////
@@ -511,26 +511,26 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
     Distributions27 dist;
     getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
-    unsigned int KQK  = subgridDistanceIndices[k]; //QK 
+    unsigned int KQK  = subgridDistanceIndices[k]; //QK
     unsigned int k000 = KQK; //000
     unsigned int kP00 = KQK; //P00
     unsigned int kM00 = neighborX[KQK]; //M00
-    unsigned int k0P0   = KQK; //n  
-    unsigned int k0M0   = neighborY[KQK]; //s  
-    unsigned int k00P   = KQK; //t  
-    unsigned int k00M   = neighborZ[KQK]; //b  
-    unsigned int kMM0  = neighborY[kM00]; //sw 
-    unsigned int kPP0  = KQK; //ne 
-    unsigned int kPM0  = k0M0; //se 
-    unsigned int kMP0  = kM00; //nw 
-    unsigned int kM0M  = neighborZ[kM00]; //bw 
-    unsigned int kP0P  = KQK; //te 
-    unsigned int kP0M  = k00M; //be 
-    unsigned int k0PP  = KQK; //tn 
-    unsigned int k0MM  = neighborZ[k0M0]; //bs 
-    unsigned int kM0P  = kM00; //tw 
-    unsigned int k0PM  = k00M; //bn 
-    unsigned int k0MP  = k0M0; //ts 
+    unsigned int k0P0   = KQK; //n
+    unsigned int k0M0   = neighborY[KQK]; //s
+    unsigned int k00P   = KQK; //t
+    unsigned int k00M   = neighborZ[KQK]; //b
+    unsigned int kMM0  = neighborY[kM00]; //sw
+    unsigned int kPP0  = KQK; //ne
+    unsigned int kPM0  = k0M0; //se
+    unsigned int kMP0  = kM00; //nw
+    unsigned int kM0M  = neighborZ[kM00]; //bw
+    unsigned int kP0P  = KQK; //te
+    unsigned int kP0M  = k00M; //be
+    unsigned int k0PP  = KQK; //tn
+    unsigned int k0MM  = neighborZ[k0M0]; //bs
+    unsigned int kM0P  = kM00; //tw
+    unsigned int k0PM  = k00M; //bn
+    unsigned int k0MP  = k0M0; //ts
     unsigned int kPMP = k0M0; //tse
     unsigned int kMPM = kM0M; //bnw
     unsigned int kMPP = kM00; //tnw
@@ -576,11 +576,11 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
       //!
       real drho = c0o1;
 
-      real vx1  = VeloX;          
+      real vx1  = VeloX;
 
-      real vx2  = VeloY; 
+      real vx2  = VeloY;
 
-      real vx3  = VeloZ; 
+      real vx3  = VeloZ;
 
       real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
@@ -625,7 +625,7 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
       (dist.f[DIR_PPM])[kPPM] = f_MMP;
       (dist.f[DIR_PPP])[kPPP] = f_MMM;
       (dist.f[DIR_PMM])[kPMM] = f_MPP;
-      
+
       (dist.f[DIR_M00])[kM00] = f_P00;
       (dist.f[DIR_MM0])[kMM0] = f_PP0;
       (dist.f[DIR_M0M])[kM0M] = f_P0P;
@@ -650,25 +650,25 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
 
 
 __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
-												int numberOfBCnodes,
+                                                int numberOfBCnodes,
                                                 int numberOfPrecursorNodes,
-												real* distributions,
-												uint* neighborX, 
-												uint* neighborY, 
-												uint* neighborZ,
-												uint* neighbors0PP, 
-												uint* neighbors0PM,
-												uint* neighbors0MP,
-												uint* neighbors0MM,
-												real* weights0PP, 
-												real* weights0PM,
-												real* weights0MP,
-												real* weights0MM,
-												real* fsLast, 
-												real* fsNext,
-												real timeRatio,
-												unsigned long long numberOfLBnodes,
-												bool isEvenTimestep)
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighbors0PP,
+                                                uint* neighbors0PM,
+                                                uint* neighbors0MP,
+                                                uint* neighbors0MM,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep)
 {
     const unsigned k = vf::gpu::getNodeIndex();
 
@@ -715,31 +715,31 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 
         f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum;
-    
+
     } else {
         f0LastInterp = f0Last[kNeighbor0PP];
         f1LastInterp = f1Last[kNeighbor0PP];
@@ -808,25 +808,25 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
                                                 real* subgridDistances,
                                                 int sizeQ,
-												int numberOfBCnodes,
+                                                int numberOfBCnodes,
                                                 int numberOfPrecursorNodes,
-												real* distributions,
-												uint* neighborX, 
-												uint* neighborY, 
-												uint* neighborZ,
-												uint* neighbors0PP, 
-												uint* neighbors0PM,
-												uint* neighbors0MP,
-												uint* neighbors0MM,
-												real* weights0PP, 
-												real* weights0PM,
-												real* weights0MP,
-												real* weights0MM,
-												real* fsLast, 
-												real* fsNext,
-												real timeRatio,
-												unsigned long long numberOfLBnodes,
-												bool isEvenTimestep)
+                                                real* distributions,
+                                                uint* neighborX,
+                                                uint* neighborY,
+                                                uint* neighborZ,
+                                                uint* neighbors0PP,
+                                                uint* neighbors0PM,
+                                                uint* neighbors0MP,
+                                                uint* neighbors0MM,
+                                                real* weights0PP,
+                                                real* weights0PM,
+                                                real* weights0MP,
+                                                real* weights0MM,
+                                                real* fsLast,
+                                                real* fsNext,
+                                                real timeRatio,
+                                                unsigned long long numberOfLBnodes,
+                                                bool isEvenTimestep)
 {
     const unsigned k = vf::gpu::getNodeIndex();
 
@@ -873,31 +873,31 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 
         f0LastInterp = (f0Last[kNeighbor0PP]*d0PP + f0Last[kNeighbor0PM]*d0PM + f0Last[kNeighbor0MP]*d0MP + f0Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f0NextInterp = (f0Next[kNeighbor0PP]*d0PP + f0Next[kNeighbor0PM]*d0PM + f0Next[kNeighbor0MP]*d0MP + f0Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f1LastInterp = (f1Last[kNeighbor0PP]*d0PP + f1Last[kNeighbor0PM]*d0PM + f1Last[kNeighbor0MP]*d0MP + f1Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f1NextInterp = (f1Next[kNeighbor0PP]*d0PP + f1Next[kNeighbor0PM]*d0PM + f1Next[kNeighbor0MP]*d0MP + f1Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f2LastInterp = (f2Last[kNeighbor0PP]*d0PP + f2Last[kNeighbor0PM]*d0PM + f2Last[kNeighbor0MP]*d0MP + f2Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f2NextInterp = (f2Next[kNeighbor0PP]*d0PP + f2Next[kNeighbor0PM]*d0PM + f2Next[kNeighbor0MP]*d0MP + f2Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f3LastInterp = (f3Last[kNeighbor0PP]*d0PP + f3Last[kNeighbor0PM]*d0PM + f3Last[kNeighbor0MP]*d0MP + f3Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f3NextInterp = (f3Next[kNeighbor0PP]*d0PP + f3Next[kNeighbor0PM]*d0PM + f3Next[kNeighbor0MP]*d0MP + f3Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f4LastInterp = (f4Last[kNeighbor0PP]*d0PP + f4Last[kNeighbor0PM]*d0PM + f4Last[kNeighbor0MP]*d0MP + f4Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f4NextInterp = (f4Next[kNeighbor0PP]*d0PP + f4Next[kNeighbor0PM]*d0PM + f4Next[kNeighbor0MP]*d0MP + f4Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f5LastInterp = (f5Last[kNeighbor0PP]*d0PP + f5Last[kNeighbor0PM]*d0PM + f5Last[kNeighbor0MP]*d0MP + f5Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f5NextInterp = (f5Next[kNeighbor0PP]*d0PP + f5Next[kNeighbor0PM]*d0PM + f5Next[kNeighbor0MP]*d0MP + f5Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f6LastInterp = (f6Last[kNeighbor0PP]*d0PP + f6Last[kNeighbor0PM]*d0PM + f6Last[kNeighbor0MP]*d0MP + f6Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f6NextInterp = (f6Next[kNeighbor0PP]*d0PP + f6Next[kNeighbor0PM]*d0PM + f6Next[kNeighbor0MP]*d0MP + f6Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f7LastInterp = (f7Last[kNeighbor0PP]*d0PP + f7Last[kNeighbor0PM]*d0PM + f7Last[kNeighbor0MP]*d0MP + f7Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f7NextInterp = (f7Next[kNeighbor0PP]*d0PP + f7Next[kNeighbor0PM]*d0PM + f7Next[kNeighbor0MP]*d0MP + f7Next[kNeighbor0MM]*d0MM)*invWeightSum;
-        
+
         f8LastInterp = (f8Last[kNeighbor0PP]*d0PP + f8Last[kNeighbor0PM]*d0PM + f8Last[kNeighbor0MP]*d0MP + f8Last[kNeighbor0MM]*d0MM)*invWeightSum;
         f8NextInterp = (f8Next[kNeighbor0PP]*d0PP + f8Next[kNeighbor0PM]*d0PM + f8Next[kNeighbor0MP]*d0MP + f8Next[kNeighbor0MM]*d0MM)*invWeightSum;
-    
+
     } else {
         f0LastInterp = f0Last[kNeighbor0PP];
         f1LastInterp = f1Last[kNeighbor0PP];
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index 6a14cebd465a1e79acf14ea019abb34e66d9c85f..a72ebb1a385d16850554dad916aab7708235980b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -1,5 +1,5 @@
 /* Device code */
-#include "LBM/LB.h" 
+#include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
 #include "lbm/MacroscopicQuantities.h"
@@ -12,21 +12,21 @@ using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
-														 real* DD, 
-														 int* k_Q, 
-														 int* k_N, 
-														 int numberOfBCnodes, 
-														 real om1, 
-														 unsigned int* neighborX,
-														 unsigned int* neighborY,
-														 unsigned int* neighborZ,
-														 unsigned int size_Mat, 
-														 bool isEvenTimestep)
+                                           real* DD,
+                                           int* k_Q,
+                                           int* k_N,
+                                           int numberOfBCnodes,
+                                           real om1,
+                                           unsigned int* neighborX,
+                                           unsigned int* neighborY,
+                                           unsigned int* neighborZ,
+                                           unsigned int size_Mat,
+                                           bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -127,7 +127,7 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -218,23 +218,23 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       // real vx1, vx2, vx3;
       real drho, drho1;
       //////////////////////////////////////////////////////////////////////////
-	  //Dichte
+     //Dichte
       drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-                f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
+                f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW +
+                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]);
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
+                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
       //////////////////////////////////////////////////////////////////////////
-	  //Schallgeschwindigkeit
-	  real cs = c1o1 / sqrtf(c3o1);
+     //Schallgeschwindigkeit
+     real cs = c1o1 / sqrtf(c3o1);
       //////////////////////////////////////////////////////////////////////////
-	  real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho; 
-	  //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one);
-	  real diffRhoToAdd = rhoBC[k] - rhoInterpol;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //no velocity
-	  //////////////////////////////////////////
+     real rhoInterpol = drho1 * cs + (c1o1 - cs) * drho;
+     //real diffRho = (rhoBC[k] + one) / (rhoInterpol + one);
+     real diffRhoToAdd = rhoBC[k] - rhoInterpol;
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     //no velocity
+     //////////////////////////////////////////
       f_E    = f1_E   * cs + (c1o1 - cs) * f_E   ;
       f_W    = f1_W   * cs + (c1o1 - cs) * f_W   ;
       f_N    = f1_N   * cs + (c1o1 - cs) * f_N   ;
@@ -261,16 +261,16 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       f_BSW  = f1_BSW * cs + (c1o1 - cs) * f_BSW ;
       f_BSE  = f1_BSE * cs + (c1o1 - cs) * f_BSE ;
       f_BNW  = f1_BNW * cs + (c1o1 - cs) * f_BNW ;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //scale by press
-	  //////////////////////////////////////////
-	  //f_E    = (f_E   + c2over27 ) * diffRho - c2over27 ;
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     //scale by press
+     //////////////////////////////////////////
+     //f_E    = (f_E   + c2over27 ) * diffRho - c2over27 ;
    //   f_W    = (f_W   + c2over27 ) * diffRho - c2over27 ;
    //   f_N    = (f_N   + c2over27 ) * diffRho - c2over27 ;
    //   f_S    = (f_S   + c2over27 ) * diffRho - c2over27 ;
    //   f_T    = (f_T   + c2over27 ) * diffRho - c2over27 ;
    //   f_B    = (f_B   + c2over27 ) * diffRho - c2over27 ;
-	  //f_NE   = (f_NE  + c1over54 ) * diffRho - c1over54 ;
+     //f_NE   = (f_NE  + c1over54 ) * diffRho - c1over54 ;
    //   f_SW   = (f_SW  + c1over54 ) * diffRho - c1over54 ;
    //   f_SE   = (f_SE  + c1over54 ) * diffRho - c1over54 ;
    //   f_NW   = (f_NW  + c1over54 ) * diffRho - c1over54 ;
@@ -290,16 +290,16 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
    //   f_BSW  = (f_BSW + c1over216) * diffRho - c1over216;
    //   f_BSE  = (f_BSE + c1over216) * diffRho - c1over216;
    //   f_BNW  = (f_BNW + c1over216) * diffRho - c1over216;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  // add press
-	  //////////////////////////////////////////
-	  f_E    = (f_E   + c2o27  * diffRhoToAdd);
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     // add press
+     //////////////////////////////////////////
+     f_E    = (f_E   + c2o27  * diffRhoToAdd);
       f_W    = (f_W   + c2o27  * diffRhoToAdd);
       f_N    = (f_N   + c2o27  * diffRhoToAdd);
       f_S    = (f_S   + c2o27  * diffRhoToAdd);
       f_T    = (f_T   + c2o27  * diffRhoToAdd);
       f_B    = (f_B   + c2o27  * diffRhoToAdd);
-	  f_NE   = (f_NE  + c1o54  * diffRhoToAdd);
+     f_NE   = (f_NE  + c1o54  * diffRhoToAdd);
       f_SW   = (f_SW  + c1o54  * diffRhoToAdd);
       f_SE   = (f_SE  + c1o54  * diffRhoToAdd);
       f_NW   = (f_NW  + c1o54  * diffRhoToAdd);
@@ -319,9 +319,9 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       f_BSW  = (f_BSW + c1o216 * diffRhoToAdd);
       f_BSE  = (f_BSE + c1o216 * diffRhoToAdd);
       f_BNW  = (f_BNW + c1o216 * diffRhoToAdd);
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-	  //////////////////////////////////////////////////////////////////////////
+     //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
          D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
@@ -351,7 +351,7 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -384,46 +384,46 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       }
       //////////////////////////////////////////////////////////////////////////
       //__syncthreads();
-	  // -X
-	  //(D.f[DIR_P00   ])[ke   ] = f_E   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;     
-	  // X
-	  (D.f[DIR_M00   ])[kw   ] = f_W   ;
-	  (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
-	  (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
-	  (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
-	  (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
-	  (D.f[DIR_MPM ])[kbnw ] = f_BNW ;     
-	  // Y
-	  //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
-	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
-	  //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
-	  //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
-	  //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
-	  // Z
-	  //(D.f[DIR_00M   ])[kb   ] = f_B   ;
-	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
-	  //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
-	  //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
-	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
-	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
-	  //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
-	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
-	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
+     // -X
+     //(D.f[DIR_P00   ])[ke   ] = f_E   ;
+     //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
+     //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
+     //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
+     //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
+     //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
+     //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
+     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+     //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
+     // X
+     (D.f[DIR_M00   ])[kw   ] = f_W   ;
+     (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
+     (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
+     (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
+     (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
+     (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
+     (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
+     (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
+     (D.f[DIR_MPM ])[kbnw ] = f_BNW ;
+     // Y
+     //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
+     //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
+     //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
+     //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
+     //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
+     //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
+     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;
+     // Z
+     //(D.f[DIR_00M   ])[kb   ] = f_B   ;
+     //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
+     //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
+     //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
+     //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
+     //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
+     //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
+     //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+     //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;
       //////////////////////////////////////////////////////////////////////////
    }
 }
@@ -469,21 +469,21 @@ __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
-													real* DD, 
-													int* k_Q, 
-													int* k_N, 
-													int numberOfBCnodes, 
-													real om1, 
-													unsigned int* neighborX,
-													unsigned int* neighborY,
-													unsigned int* neighborZ,
-													unsigned int size_Mat, 
-													bool isEvenTimestep)
+                                       real* DD,
+                                       int* k_Q,
+                                       int* k_N,
+                                       int numberOfBCnodes,
+                                       real om1,
+                                       unsigned int* neighborX,
+                                       unsigned int* neighborY,
+                                       unsigned int* neighborZ,
+                                       unsigned int size_Mat,
+                                       bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -584,7 +584,7 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -652,17 +652,17 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
                           f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
       real vx1      =  ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						  ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-						  (f1_E - f1_W); 
+                    ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
+                    (f1_E - f1_W);
 
 
       real vx2    =   (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
-						 ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-						 (f1_N - f1_S); 
+                   ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
+                   (f1_N - f1_S);
 
       real vx3    =   ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
-						 (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-						 (f1_T - f1_B); 
+                   (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
+                   (f1_T - f1_B);
 
       real cusq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
@@ -693,15 +693,15 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       f1_BNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq));
       f1_BSE   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   -=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
-	   
-	  drho1 = rhoBC[k];
 
-	  //if(vx1 < zero){
-		 // vx1 *= 0.9;
-	  //}
-	  //if(vx2 < zero){
-		 // vx2 *= c1o10;//0.9;
-	  //}
+     drho1 = rhoBC[k];
+
+     //if(vx1 < zero){
+       // vx1 *= 0.9;
+     //}
+     //if(vx2 < zero){
+       // vx2 *= c1o10;//0.9;
+     //}
 
       f1_ZERO  += c8o27*  (drho1-(drho1+c1o1)*cusq);
       f1_E     += c2o27*  (drho1+(drho1+c1o1)*(c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cusq));
@@ -731,39 +731,39 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       f1_BSE   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
       f1_TNW   +=  c1o216*(drho1+(drho1+c1o1)*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  //drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     //drho1 = drho1 - rhoBC[k];
       //////////////////////////////////////////////////////////////////////////
 
       __syncthreads();
 
-      (D.f[DIR_P00   ])[ke   ] = f1_W   ;  
-      (D.f[DIR_M00   ])[kw   ] = f1_E   ;	
-      (D.f[DIR_0P0   ])[kn   ] = f1_S   ;	
-      (D.f[DIR_0M0   ])[ks   ] = f1_N   ;	
-      (D.f[DIR_00P   ])[kt   ] = f1_B   ;	
-      (D.f[DIR_00M   ])[kb   ] = f1_T   ;	
-      (D.f[DIR_PP0  ])[kne  ] = f1_SW  ;	
-      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  ;	
-      (D.f[DIR_PM0  ])[kse  ] = f1_NW  ;	
-      (D.f[DIR_MP0  ])[knw  ] = f1_SE  ;	
-      (D.f[DIR_P0P  ])[kte  ] = f1_BW  ;	
-      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  ;	
-      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  ;	
-      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  ;	
-      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  ;	
-      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  ;	
-      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  ;	
-      (D.f[DIR_0MP  ])[kts  ] = f1_BN  ;	
-      (D.f[DIR_000])[kzero] = f1_ZERO;	
-      (D.f[DIR_PPP ])[ktne ] = f1_BSW ;	
-      (D.f[DIR_MMP ])[ktsw ] = f1_BNE ;	
-      (D.f[DIR_PMP ])[ktse ] = f1_BNW ;	
-      (D.f[DIR_MPP ])[ktnw ] = f1_BSE ;	
-      (D.f[DIR_PPM ])[kbne ] = f1_TSW ;	
-      (D.f[DIR_MMM ])[kbsw ] = f1_TNE ;	
-      (D.f[DIR_PMM ])[kbse ] = f1_TNW ;	
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE ;       
+      (D.f[DIR_P00   ])[ke   ] = f1_W   ;
+      (D.f[DIR_M00   ])[kw   ] = f1_E   ;
+      (D.f[DIR_0P0   ])[kn   ] = f1_S   ;
+      (D.f[DIR_0M0   ])[ks   ] = f1_N   ;
+      (D.f[DIR_00P   ])[kt   ] = f1_B   ;
+      (D.f[DIR_00M   ])[kb   ] = f1_T   ;
+      (D.f[DIR_PP0  ])[kne  ] = f1_SW  ;
+      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  ;
+      (D.f[DIR_PM0  ])[kse  ] = f1_NW  ;
+      (D.f[DIR_MP0  ])[knw  ] = f1_SE  ;
+      (D.f[DIR_P0P  ])[kte  ] = f1_BW  ;
+      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  ;
+      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  ;
+      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  ;
+      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  ;
+      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  ;
+      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  ;
+      (D.f[DIR_0MP  ])[kts  ] = f1_BN  ;
+      (D.f[DIR_000])[kzero] = f1_ZERO;
+      (D.f[DIR_PPP ])[ktne ] = f1_BSW ;
+      (D.f[DIR_MMP ])[ktsw ] = f1_BNE ;
+      (D.f[DIR_PMP ])[ktse ] = f1_BNW ;
+      (D.f[DIR_MPP ])[ktnw ] = f1_BSE ;
+      (D.f[DIR_PPM ])[kbne ] = f1_TSW ;
+      (D.f[DIR_MMM ])[kbsw ] = f1_TNE ;
+      (D.f[DIR_PMM ])[kbse ] = f1_TNW ;
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -808,26 +808,26 @@ __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceNEQ27(real* rhoBC,
-                                             real* distribution, 
+                                             real* distribution,
                                              int* bcNodeIndices,
                                              int* bcNeighborIndices,
                                              int numberOfBCnodes,
-                                             real omega1, 
+                                             real omega1,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int numberOfLBnodes, 
+                                             unsigned int numberOfLBnodes,
                                              bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
-	//! The pressure boundary condition is executed in the following steps
-	//!
-	////////////////////////////////////////////////////////////////////////////////
-	//! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-	//!
-   const unsigned x = threadIdx.x;    // global x-index 
-   const unsigned y = blockIdx.x;     // global y-index 
-   const unsigned z = blockIdx.y;     // global z-index 
+   //! The pressure boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned x = threadIdx.x;    // global x-index
+   const unsigned y = blockIdx.x;     // global y-index
+   const unsigned z = blockIdx.y;     // global z-index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -949,20 +949,20 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
       //! - Calculate macroscopic quantities (for neighboring node)
       //!
       real drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-                   f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]); 
+                   f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW +
+                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]);
 
       real vx1  = (((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                    ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-                   (f1_E - f1_W)) / (c1o1 + drho1);          
+                   (f1_E - f1_W)) / (c1o1 + drho1);
 
       real vx2  = ((-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                    ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-                   (f1_N - f1_S)) / (c1o1 + drho1); 
+                   (f1_N - f1_S)) / (c1o1 + drho1);
 
       real vx3  = (((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
                    (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-                   (f1_T - f1_B)) / (c1o1 + drho1); 
+                   (f1_T - f1_B)) / (c1o1 + drho1);
 
       real cusq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3);
 
@@ -1110,16 +1110,16 @@ __global__ void QPressDeviceNEQ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-__global__ void LB_BC_Press_East27( int nx, 
-                                               int ny, 
-                                               int tz, 
-                                               unsigned int* bcMatD, 
+__global__ void LB_BC_Press_East27( int nx,
+                                               int ny,
+                                               int tz,
+                                               unsigned int* bcMatD,
                                                unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
-                                               real* DD, 
-                                               unsigned int size_Mat, 
-                                               bool isEvenTimestep) 
+                                               real* DD,
+                                               unsigned int size_Mat,
+                                               bool isEvenTimestep)
 {
    //thread-index
    int ty = blockIdx.x;
@@ -1127,9 +1127,9 @@ __global__ void LB_BC_Press_East27( int nx,
 
    int  k, k1, nxny;                   // Zugriff auf arrays im device
 
-   int  x = tx + STARTOFFX;  // Globaler x-Index 
-   int  y = ty + STARTOFFY;  // Globaler y-Index 
-   int  z = tz + STARTOFFZ;  // Globaler z-Index 
+   int  x = tx + STARTOFFX;  // Globaler x-Index
+   int  y = ty + STARTOFFY;  // Globaler y-Index
+   int  z = tz + STARTOFFZ;  // Globaler z-Index
 
    k = nx*(ny*z + y) + x;
    nxny = nx*ny;
@@ -1167,7 +1167,7 @@ __global__ void LB_BC_Press_East27( int nx,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -1374,10 +1374,10 @@ __global__ void LB_BC_Press_East27( int nx,
       (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;
       (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;
       (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;       
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;
    }
    __syncthreads();
-}          
+}
 //////////////////////////////////////////////////////////////////////////////
 
 
@@ -1420,15 +1420,15 @@ __global__ void LB_BC_Press_East27( int nx,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDevice27(real* rhoBC,
-                                           real* DD, 
-                                           int* k_Q, 
+                                           real* DD,
+                                           int* k_Q,
                                            real* QQ,
-                                           unsigned int numberOfBCnodes, 
-                                           real om1, 
+                                           unsigned int numberOfBCnodes,
+                                           real om1,
                                            unsigned int* neighborX,
                                            unsigned int* neighborY,
                                            unsigned int* neighborZ,
-                                           unsigned int size_Mat, 
+                                           unsigned int size_Mat,
                                            bool isEvenTimestep)
 {
    Distributions27 D;
@@ -1461,7 +1461,7 @@ __global__ void QPressDevice27(real* rhoBC,
       D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
       D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
       D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
+   }
    else
    {
       D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -1493,9 +1493,9 @@ __global__ void QPressDevice27(real* rhoBC,
       D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -1505,11 +1505,11 @@ __global__ void QPressDevice27(real* rhoBC,
 
    if(k<numberOfBCnodes)
    {
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
          *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-         *q_dirBSE, *q_dirBNW; 
+         *q_dirBSE, *q_dirBNW;
       q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
       q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
       q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
@@ -1600,16 +1600,16 @@ __global__ void QPressDevice27(real* rhoBC,
       real q, vx1, vx2, vx3, drho;
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                  (f_E - f_W); 
+                  (f_E - f_W);
 
 
       vx2    =   (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                  (f_N - f_S); 
+                  (f_N - f_S);
 
       vx3    =   ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                  (f_T - f_B); 
+                  (f_T - f_B);
 
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
@@ -1646,7 +1646,7 @@ __global__ void QPressDevice27(real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -1681,183 +1681,183 @@ __global__ void QPressDevice27(real* rhoBC,
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-         //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
+         (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq); 
-         //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
+         (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+         //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
+         (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-         //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
+         (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-         //(D.f[DIR_00P])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
+         (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         //(D.f[DIR_00P])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
-         //(D.f[DIR_00M])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
+         (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+         //(D.f[DIR_00M])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
+         (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
+         (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
+         (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
+         (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
+         (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
+         (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
+         (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
+         (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
+         (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
+         (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
+         (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
+         (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
       }
    }
 }
@@ -1903,19 +1903,19 @@ __global__ void QPressDevice27(real* rhoBC,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceAntiBB27(   real* rhoBC,
-												   real* vx,
-												   real* vy,
-												   real* vz,
-												   real* DD, 
-												   int* k_Q, 
-												   real* QQ,
-												   int numberOfBCnodes, 
-												   real om1, 
-												   unsigned int* neighborX,
-												   unsigned int* neighborY,
-												   unsigned int* neighborZ,
-												   unsigned int size_Mat, 
-												   bool isEvenTimestep)
+                                       real* vx,
+                                       real* vy,
+                                       real* vz,
+                                       real* DD,
+                                       int* k_Q,
+                                       real* QQ,
+                                       int numberOfBCnodes,
+                                       real om1,
+                                       unsigned int* neighborX,
+                                       unsigned int* neighborY,
+                                       unsigned int* neighborZ,
+                                       unsigned int size_Mat,
+                                       bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
@@ -1947,7 +1947,7 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
       D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
       D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-   } 
+   }
    else
    {
       D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -1979,9 +1979,9 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -1991,11 +1991,11 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
    if(k<numberOfBCnodes)
    {
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
          *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-         *q_dirBSE, *q_dirBNW; 
+         *q_dirBSE, *q_dirBNW;
       q_dirE   = &QQ[DIR_P00   *numberOfBCnodes];
       q_dirW   = &QQ[DIR_M00   *numberOfBCnodes];
       q_dirN   = &QQ[DIR_0P0   *numberOfBCnodes];
@@ -2087,25 +2087,25 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       //real vx1, vx2, vx3, drho;
       //vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //            ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-      //            (f_E - f_W); 
+      //            (f_E - f_W);
 
 
       //vx2    =   (-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //            ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-      //            (f_N - f_S); 
+      //            (f_N - f_S);
 
       //vx3    =   ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
       //            (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-      //            (f_T - f_B); 
+      //            (f_T - f_B);
 
       //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
       real drho    = f_ZERO+f_E+f_W+f_N+f_S+f_T+f_B+f_NE+f_SW+f_SE+f_NW+f_TE+f_BW+f_BE+f_TW+f_TN+f_BS+f_BN+f_TS+
-						f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
+                  f_TNE+f_TSW+f_TSE+f_TNW+f_BNE+f_BSW+f_BSE+f_BNW;
       drho = drho - rhoBC[k];
-	  drho *= 0.01f;
+     drho *= 0.01f;
       ////////////////////////////////////////////////////////////////////////////////
-	  real q;
+     real q;
       //deltaRho = (rhoBC[k] + one) / (deltaRho + one);
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
@@ -2137,7 +2137,7 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -2172,7 +2172,7 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M00])[kw]=f_W-c2o27*drho; 
+         (D.f[DIR_M00])[kw]=f_W-c2o27*drho;
       }
 
       q = q_dirW[k];
@@ -2184,19 +2184,19 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0M0])[ks]=f_S-c2o27*drho; 
+         (D.f[DIR_0M0])[ks]=f_S-c2o27*drho;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_0P0])[kn]=f_N-c2o27*drho; 
+         (D.f[DIR_0P0])[kn]=f_N-c2o27*drho;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_00M])[kb]=f_B-c2o27*drho; 
+         (D.f[DIR_00M])[kb]=f_B-c2o27*drho;
       }
 
       q = q_dirB[k];
@@ -2232,13 +2232,13 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho; 
+         (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho; 
+         (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho;
       }
 
       q = q_dirBE[k];
@@ -2368,20 +2368,20 @@ __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceFixBackflow27( real* rhoBC,
-                                                      real* DD, 
-                                                      int* k_Q, 
-                                                      int numberOfBCnodes, 
-                                                      real om1, 
+                                                      real* DD,
+                                                      int* k_Q,
+                                                      int numberOfBCnodes,
+                                                      real om1,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
                                                       unsigned int* neighborZ,
-                                                      unsigned int size_Mat, 
+                                                      unsigned int size_Mat,
                                                       bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -2456,7 +2456,7 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -2559,20 +2559,20 @@ __global__ void QPressDeviceFixBackflow27( real* rhoBC,
 
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
-                                                     real* DD, 
-                                                     int* k_Q, 
-                                                     int numberOfBCnodes, 
-                                                     real om1, 
+                                                     real* DD,
+                                                     int* k_Q,
+                                                     int numberOfBCnodes,
+                                                     real om1,
                                                      unsigned int* neighborX,
                                                      unsigned int* neighborY,
                                                      unsigned int* neighborZ,
-                                                     unsigned int size_Mat, 
+                                                     unsigned int size_Mat,
                                                      bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -2647,7 +2647,7 @@ __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -2803,16 +2803,16 @@ __host__ __device__ real computeOutflowDistribution(const real* const &f, const
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressNoRhoDevice27( real* rhoBC,
-												 real* distributions, 
-												 int* k_Q, 
-												 int* k_N, 
-												 int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes, 
-												 bool isEvenTimestep,
+                                     real* distributions,
+                                     int* k_Q,
+                                     int* k_N,
+                                     int numberOfBCnodes,
+                                     real om1,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned int numberOfLBnodes,
+                                     bool isEvenTimestep,
                                      int direction)
 {
    ////////////////////////////////////////////////////////////////////////////////
@@ -2885,8 +2885,8 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
    unsigned int k1bsw = neighborZ[k1sw];
    ////////////////////////////////////////////////////////////////////////////////
    Distributions27 dist;
-   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);      
-   real f[27], f1[27]; 
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+   real f[27], f1[27];
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    f1[DIR_P00] = (dist.f[DIR_P00])[k1e   ];
    f1[DIR_M00] = (dist.f[DIR_M00])[k1w   ];
@@ -2986,9 +2986,9 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
          (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
          (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
          (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
-         break;  
+         break;
 
-      case ZPZ:   
+      case ZPZ:
          (dist.f[DIR_0M0])[ks   ] = computeOutflowDistribution(f, f1, DIR_0M0, cs);
          (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
          (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
@@ -3009,7 +3009,7 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
          (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
          (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
          (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
-         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); 
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
          break;
 
       case ZZP:
@@ -3021,7 +3021,7 @@ __global__ void QPressNoRhoDevice27( real* rhoBC,
          (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
          (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
          (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
-         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);     
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
          break;
       default:
          break;
@@ -3036,22 +3036,22 @@ __host__ __device__ real computeOutflowDistribution(const real* const &f, const
 }
 
 __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
-												 real* distributions, 
-												 int* k_Q, 
-												 int* k_N, 
-												 int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int numberOfLBnodes, 
-												 bool isEvenTimestep,
+                                     real* distributions,
+                                     int* k_Q,
+                                     int* k_N,
+                                     int numberOfBCnodes,
+                                     real om1,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned int numberOfLBnodes,
+                                     bool isEvenTimestep,
                                      int direction,
                                      real densityCorrectionFactor)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned k = vf::gpu::getNodeIndex();
-   
+
    //////////////////////////////////////////////////////////////////////////
 
    if(k>=numberOfBCnodes) return;
@@ -3079,8 +3079,8 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
    uint kN_MMM = neighborZ[k_MM0];
    ////////////////////////////////////////////////////////////////////////////////
    Distributions27 dist;
-   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);   
-   real f[27], fN[27];   
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+   real f[27], fN[27];
    //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    f[DIR_000] = (dist.f[DIR_000])[k_000];
    f[DIR_P00] = (dist.f[DIR_P00])[k_000];
@@ -3139,9 +3139,9 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
    fN[DIR_MMM] = (dist.f[DIR_MMM])[kN_MMM];
    //////////////////////////////////////////////////////////////////////////
    real drho = vf::lbm::getDensity(f);
-   
+
    real rhoCorrection = densityCorrectionFactor*drho;
-   
+
    real cs = c1o1 / sqrtf(c3o1);
 
    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
@@ -3182,9 +3182,9 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
          (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216);
          (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216);
          (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216);
-         break;  
+         break;
 
-      case ZPZ:   
+      case ZPZ:
          (dist.f[DIR_0M0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_0M0, rhoCorrection, cs, c2o27);
          (dist.f[DIR_PM0])[k_0M0] =computeOutflowDistribution(f, fN, DIR_PM0, rhoCorrection, cs, c1o54);
          (dist.f[DIR_MM0])[k_MM0] =computeOutflowDistribution(f, fN, DIR_MM0, rhoCorrection, cs, c1o54);
@@ -3205,7 +3205,7 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
          (dist.f[DIR_PPP])[k_000] = computeOutflowDistribution(f, fN, DIR_PPP, rhoCorrection, cs, c1o216);
          (dist.f[DIR_MPP])[k_M00] = computeOutflowDistribution(f, fN, DIR_MPP, rhoCorrection, cs, c1o216);
          (dist.f[DIR_PMP])[k_0M0] = computeOutflowDistribution(f, fN, DIR_PMP, rhoCorrection, cs, c1o216);
-         (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216); 
+         (dist.f[DIR_MMP])[k_MM0] = computeOutflowDistribution(f, fN, DIR_MMP, rhoCorrection, cs, c1o216);
          break;
 
       case ZZP:
@@ -3217,7 +3217,7 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
          (dist.f[DIR_PPM])[k_00M] = computeOutflowDistribution(f, fN, DIR_PPM, rhoCorrection, cs, c1o216);
          (dist.f[DIR_MPM])[k_M0M] = computeOutflowDistribution(f, fN, DIR_MPM, rhoCorrection, cs, c1o216);
          (dist.f[DIR_PMM])[k_0MM] = computeOutflowDistribution(f, fN, DIR_PMM, rhoCorrection, cs, c1o216);
-         (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216);     
+         (dist.f[DIR_MMM])[k_MMM] = computeOutflowDistribution(f, fN, DIR_MMM, rhoCorrection, cs, c1o216);
          break;
       default:
          break;
@@ -3256,21 +3256,21 @@ __global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceOld27(real* rhoBC,
-                                             real* DD, 
-                                             int* k_Q, 
-                                             int* k_N, 
-                                             int numberOfBCnodes, 
-                                             real om1, 
+                                             real* DD,
+                                             int* k_Q,
+                                             int* k_N,
+                                             int numberOfBCnodes,
+                                             real om1,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat, 
+                                             unsigned int size_Mat,
                                              bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -3371,7 +3371,7 @@ __global__ void QPressDeviceOld27(real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -3438,8 +3438,8 @@ __global__ void QPressDeviceOld27(real* rhoBC,
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                           f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     drho1 = drho1 - rhoBC[k];
       //////////////////////////////////////////////////////////////////////////
 
       __syncthreads();
@@ -3470,7 +3470,7 @@ __global__ void QPressDeviceOld27(real* rhoBC,
       (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3515,22 +3515,22 @@ __global__ void QPressDeviceOld27(real* rhoBC,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceEQZ27(real* rhoBC,
-                                             real* DD, 
-                                             int* k_Q, 
+                                             real* DD,
+                                             int* k_Q,
                                              int* k_N,
-											 real* kTestRE,
-                                             int numberOfBCnodes, 
-                                             real om1, 
+                                  real* kTestRE,
+                                             int numberOfBCnodes,
+                                             real om1,
                                              unsigned int* neighborX,
                                              unsigned int* neighborY,
                                              unsigned int* neighborZ,
-                                             unsigned int size_Mat, 
+                                             unsigned int size_Mat,
                                              bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -3631,7 +3631,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -3755,17 +3755,17 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
 
    //   //////////////////////////////////////////////////////////////////////////
    //   real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+ f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
-	  //real vx1      = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1);
-	  //real vx2      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1);
-	  //real vx3      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1);
+     //real vx1      = (((f1_TNE-f1_BSW)+(f1_BSE-f1_TNW)+(f1_BNE-f1_TSW)+(f1_TSE-f1_BNW)) + (((f1_NE-f1_SW)+(f1_TE-f1_BW))+((f1_SE-f1_NW)+(f1_BE-f1_TW))) + (f1_E-f1_W)) / (one + drho1);
+     //real vx2      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_BNE-f1_TSW)+(f1_BNW-f1_TSE)) + (((f1_NE-f1_SW)+(f1_TN-f1_BS))+((f1_BN-f1_TS)+(f1_NW-f1_SE))) + (f1_N-f1_S)) / (one + drho1);
+     //real vx3      = (((f1_TNE-f1_BSW)+(f1_TNW-f1_BSE)+(f1_TSW-f1_BNE)+(f1_TSE-f1_BNW)) + (((f1_TE-f1_BW)+(f1_TN-f1_BS))+((f1_TW-f1_BE)+(f1_TS-f1_BN))) + (f1_T-f1_B)) / (one + drho1);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////real omega = om1;
+     ////real omega = om1;
    //   real cusq  = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////Tï¿½st MK
-	  ////if(vx1 < zero) vx1 = zero;
+     ////Tï¿½st MK
+     ////if(vx1 < zero) vx1 = zero;
    //   //////////////////////////////////////////////////////////////////////////
-	  ////becomes higher with neighbor source and lower with local source
+     ////becomes higher with neighbor source and lower with local source
    //   //real fZERO = c8over27*  (rhoBC[k]-(one + rhoBC[k])*(cusq))                                                           ;
    //   //real fE    = c2over27*  (rhoBC[k]+(one + rhoBC[k])*(three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq));
    //   //real fW    = c2over27*  (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq));
@@ -3794,7 +3794,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    //   //real fBSE  = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
    //   //real fTNW  = c1over216* (rhoBC[k]+(one + rhoBC[k])*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
    //   //////////////////////////////////////////////////////////////////////////
-	  //// based on VirtualFluids (kucher + fard)
+     //// based on VirtualFluids (kucher + fard)
    //   real fZERO = c8over27  * rhoBC[k] * (one                                                                      - cusq);
    //   real fE    = c2over27  * rhoBC[k] * (one + three * ( vx1        ) + c9over2 * ( vx1        ) * ( vx1        ) - cusq);
    //   real fW    = c2over27  * rhoBC[k] * (one + three * (-vx1        ) + c9over2 * (-vx1        ) * (-vx1        ) - cusq);
@@ -3823,7 +3823,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    //   real fBSE  = c1over216 * rhoBC[k] * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq);
    //   real fTNW  = c1over216 * rhoBC[k] * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq);
    ////   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //////test
+     //////test
    ////   real fZERO = c8over27  * ((drho1 + rhoBC[k]) / two) * (one                                                                      - cusq);
    ////   real fE    = c2over27  * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1        ) + c9over2 * ( vx1        ) * ( vx1        ) - cusq);
    ////   real fW    = c2over27  * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1        ) + c9over2 * (-vx1        ) * (-vx1        ) - cusq);
@@ -3852,190 +3852,190 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
    ////   real fBSE  = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * ( vx1-vx2-vx3) + c9over2 * ( vx1-vx2-vx3) * ( vx1-vx2-vx3) - cusq);
    ////   real fTNW  = c1over216 * ((drho1 + rhoBC[k]) / two) * (one + three * (-vx1+vx2+vx3) + c9over2 * (-vx1+vx2+vx3) * (-vx1+vx2+vx3) - cusq);
 
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+         //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//double mfabb = (D.f[DIR_P00   ])[k1e   ];
-			//double mfcbb = (D.f[DIR_M00   ])[k1w   ];
-			//double mfbab = (D.f[DIR_0P0   ])[k1n   ];
-			//double mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-			//double mfbba = (D.f[DIR_00P   ])[k1t   ];
-			//double mfbbc = (D.f[DIR_00M   ])[k1b   ];
-			//double mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-			//double mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-			//double mfacb = (D.f[DIR_PM0  ])[k1se  ];
-			//double mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-			//double mfaba = (D.f[DIR_P0P  ])[k1te  ];
-			//double mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-			//double mfabc = (D.f[DIR_P0M  ])[k1be  ];
-			//double mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-			//double mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-			//double mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-			//double mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-			//double mfbca = (D.f[DIR_0MP  ])[k1ts  ];
-			//double mfbbb = (D.f[DIR_000])[k1zero];
-			//double mfaaa = (D.f[DIR_PPP ])[k1tne ];
-			//double mfcca = (D.f[DIR_MMP ])[k1tsw ];
-			//double mfaca = (D.f[DIR_PMP ])[k1tse ];
-			//double mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-			//double mfaac = (D.f[DIR_PPM ])[k1bne ];
-			//double mfccc = (D.f[DIR_MMM ])[k1bsw ];
-			//double mfacc = (D.f[DIR_PMM ])[k1bse ];
-			//double mfcac = (D.f[DIR_MPM ])[k1bnw ];
-			real mfabb = (D.f[DIR_P00   ])[k1e   ];
-			real mfcbb = (D.f[DIR_M00   ])[k1w   ];
-			real mfbab = (D.f[DIR_0P0   ])[k1n   ];
-			real mfbcb = (D.f[DIR_0M0   ])[k1s   ];
-			real mfbba = (D.f[DIR_00P   ])[k1t   ];
-			real mfbbc = (D.f[DIR_00M   ])[k1b   ];
-			real mfaab = (D.f[DIR_PP0  ])[k1ne  ];
-			real mfccb = (D.f[DIR_MM0  ])[k1sw  ];
-			real mfacb = (D.f[DIR_PM0  ])[k1se  ];
-			real mfcab = (D.f[DIR_MP0  ])[k1nw  ];
-			real mfaba = (D.f[DIR_P0P  ])[k1te  ];
-			real mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
-			real mfabc = (D.f[DIR_P0M  ])[k1be  ];
-			real mfcba = (D.f[DIR_M0P  ])[k1tw  ];
-			real mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
-			real mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
-			real mfbac = (D.f[DIR_0PM  ])[k1bn  ];
-			real mfbca = (D.f[DIR_0MP  ])[k1ts  ];
-			real mfbbb = (D.f[DIR_000])[k1zero];
-			real mfaaa = (D.f[DIR_PPP ])[k1tne ];
-			real mfcca = (D.f[DIR_MMP ])[k1tsw ];
-			real mfaca = (D.f[DIR_PMP ])[k1tse ];
-			real mfcaa = (D.f[DIR_MPP ])[k1tnw ];
-			real mfaac = (D.f[DIR_PPM ])[k1bne ];
-			real mfccc = (D.f[DIR_MMM ])[k1bsw ];
-			real mfacc = (D.f[DIR_PMM ])[k1bse ];
-			real mfcac = (D.f[DIR_MPM ])[k1bnw ];
-
-			//real mfcbb = (D.f[DIR_P00   ])[ke   ];
-			//real mfabb = (D.f[DIR_M00   ])[kw   ];
-			//real mfbcb = (D.f[DIR_0P0   ])[kn   ];
-			//real mfbab = (D.f[DIR_0M0   ])[ks   ];
-			//real mfbbc = (D.f[DIR_00P   ])[kt   ];
-			//real mfbba = (D.f[DIR_00M   ])[kb   ];
-			//real mfccb = (D.f[DIR_PP0  ])[kne  ];
-			//real mfaab = (D.f[DIR_MM0  ])[ksw  ];
-			//real mfcab = (D.f[DIR_PM0  ])[kse  ];
-			//real mfacb = (D.f[DIR_MP0  ])[knw  ];
-			//real mfcbc = (D.f[DIR_P0P  ])[kte  ];
-			//real mfaba = (D.f[DIR_M0M  ])[kbw  ];
-			//real mfcba = (D.f[DIR_P0M  ])[kbe  ];
-			//real mfabc = (D.f[DIR_M0P  ])[ktw  ];
-			//real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
-			//real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
-			//real mfbca = (D.f[DIR_0PM  ])[kbn  ];
-			//real mfbac = (D.f[DIR_0MP  ])[kts  ];
-			//real mfbbb = (D.f[DIR_000])[kzero];
-			//real mfccc = (D.f[DIR_PPP ])[ktne ];
-			//real mfaac = (D.f[DIR_MMP ])[ktsw ];
-			//real mfcac = (D.f[DIR_PMP ])[ktse ];
-			//real mfacc = (D.f[DIR_MPP ])[ktnw ];
-			//real mfcca = (D.f[DIR_PPM ])[kbne ];
-			//real mfaaa = (D.f[DIR_MMM ])[kbsw ];
-			//real mfcaa = (D.f[DIR_PMM ])[kbse ];
-			//real mfaca = (D.f[DIR_MPM ])[kbnw ];
-			////////////////////////////////////////////////////////////////////////////////////
-			//real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
-			//				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
-			//				((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one
-			////////////////////////////////////////////////////////////////////////////////////
-			real rho = rhoBC[k];
-			////////////////////////////////////////////////////////////////////////////////////
-			real OoRho = c1o1 / (rho * 1.5f);
-			////////////////////////////////////////////////////////////////////////////////////
-			real vvx    = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
-						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
-						       (mfcbb-mfabb)) * OoRho;
-			real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) + 
-				             (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
-				               (mfbcb-mfbab)) * OoRho;
-			real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) + 
-				             (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
-				               (mfbbc-mfbba)) * OoRho;
-			/////////////////////////
-			//Test Values
-			//double vvx    = 0.016;
-			//double vvy    = zero;
-			//double vvz    = zero;
-			////////////////////////////////////////////////////////////////////////////////////////
-			////round off error test
-			//if(vvx!=zero){
-			//	(kDistTest.f[DIR_P00   ])[k] = mfabb;
-			//	(kDistTest.f[DIR_M00   ])[k] = mfcbb;
-			//	(kDistTest.f[DIR_0P0   ])[k] = mfbab;
-			//	(kDistTest.f[DIR_0M0   ])[k] = mfbcb;
-			//	(kDistTest.f[DIR_00P   ])[k] = mfbba;
-			//	(kDistTest.f[DIR_00M   ])[k] = mfbbc;
-			//	(kDistTest.f[DIR_PP0  ])[k] = mfaab;
-			//	(kDistTest.f[DIR_MM0  ])[k] = mfccb;
-			//	(kDistTest.f[DIR_PM0  ])[k] = mfacb;
-			//	(kDistTest.f[DIR_MP0  ])[k] = mfcab;
-			//	(kDistTest.f[DIR_P0P  ])[k] = mfaba;
-			//	(kDistTest.f[DIR_M0M  ])[k] = mfcbc;
-			//	(kDistTest.f[DIR_P0M  ])[k] = mfabc;
-			//	(kDistTest.f[DIR_M0P  ])[k] = mfcba;
-			//	(kDistTest.f[DIR_0PP  ])[k] = mfbaa;
-			//	(kDistTest.f[DIR_0MM  ])[k] = mfbcc;
-			//	(kDistTest.f[DIR_0PM  ])[k] = mfbac;
-			//	(kDistTest.f[DIR_0MP  ])[k] = mfbca;
-			//	(kDistTest.f[DIR_000])[k] = KQK;
-			//	(kDistTest.f[DIR_PPP ])[k] = mfaaa;
-			//	(kDistTest.f[DIR_MMP ])[k] = mfcca;
-			//	(kDistTest.f[DIR_PMP ])[k] = mfaca;
-			//	(kDistTest.f[DIR_MPP ])[k] = mfcaa;
-			//	(kDistTest.f[DIR_PPM ])[k] = mfaac;
-			//	(kDistTest.f[DIR_MMM ])[k] = mfccc;
-			//	(kDistTest.f[DIR_PMM ])[k] = mfacc;
-			//	(kDistTest.f[DIR_MPM ])[k] = mfcac;
-			//}else{
-			//	(kDistTest.f[DIR_P00   ])[k] = zero;
-			//	(kDistTest.f[DIR_M00   ])[k] = zero;
-			//	(kDistTest.f[DIR_0P0   ])[k] = zero;
-			//	(kDistTest.f[DIR_0M0   ])[k] = zero;
-			//	(kDistTest.f[DIR_00P   ])[k] = zero;
-			//	(kDistTest.f[DIR_00M   ])[k] = zero;
-			//	(kDistTest.f[DIR_PP0  ])[k] = zero;
-			//	(kDistTest.f[DIR_MM0  ])[k] = zero;
-			//	(kDistTest.f[DIR_PM0  ])[k] = zero;
-			//	(kDistTest.f[DIR_MP0  ])[k] = zero;
-			//	(kDistTest.f[DIR_P0P  ])[k] = zero;
-			//	(kDistTest.f[DIR_M0M  ])[k] = zero;
-			//	(kDistTest.f[DIR_P0M  ])[k] = zero;
-			//	(kDistTest.f[DIR_M0P  ])[k] = zero;
-			//	(kDistTest.f[DIR_0PP  ])[k] = zero;
-			//	(kDistTest.f[DIR_0MM  ])[k] = zero;
-			//	(kDistTest.f[DIR_0PM  ])[k] = zero;
-			//	(kDistTest.f[DIR_0MP  ])[k] = zero;
-			//	(kDistTest.f[DIR_000])[k] = zero;
-			//	(kDistTest.f[DIR_PPP ])[k] = zero;
-			//	(kDistTest.f[DIR_MMP ])[k] = zero;
-			//	(kDistTest.f[DIR_PMP ])[k] = zero;
-			//	(kDistTest.f[DIR_MPP ])[k] = zero;
-			//	(kDistTest.f[DIR_PPM ])[k] = zero;
-			//	(kDistTest.f[DIR_MMM ])[k] = zero;
-			//	(kDistTest.f[DIR_PMM ])[k] = zero;
-			//	(kDistTest.f[DIR_MPM ])[k] = zero;
-			//}
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// first bad fix for negative x velocity
-			////if(vvx > zero) vvx = zero;
-			//////////////////////////////////////////////////////////////////////////////////////
-			////// second bad fix for negative x velocity
-			////if(vvx > zero){
-			////	vvx = -vvx;
-			////	vvy = -vvy;
-			////	vvz = -vvz;
-			////}
-			////////////////////////////////////////////////////////////////////////////////////
-			double vx2    = vvx * vvx;
-			double vy2    = vvy * vvy;
-			double vz2    = vvz * vvz;
-			//////////////////////////////////////////////////////////////////////////////////
-			//original
+         //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+         //double mfabb = (D.f[DIR_P00   ])[k1e   ];
+         //double mfcbb = (D.f[DIR_M00   ])[k1w   ];
+         //double mfbab = (D.f[DIR_0P0   ])[k1n   ];
+         //double mfbcb = (D.f[DIR_0M0   ])[k1s   ];
+         //double mfbba = (D.f[DIR_00P   ])[k1t   ];
+         //double mfbbc = (D.f[DIR_00M   ])[k1b   ];
+         //double mfaab = (D.f[DIR_PP0  ])[k1ne  ];
+         //double mfccb = (D.f[DIR_MM0  ])[k1sw  ];
+         //double mfacb = (D.f[DIR_PM0  ])[k1se  ];
+         //double mfcab = (D.f[DIR_MP0  ])[k1nw  ];
+         //double mfaba = (D.f[DIR_P0P  ])[k1te  ];
+         //double mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
+         //double mfabc = (D.f[DIR_P0M  ])[k1be  ];
+         //double mfcba = (D.f[DIR_M0P  ])[k1tw  ];
+         //double mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
+         //double mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
+         //double mfbac = (D.f[DIR_0PM  ])[k1bn  ];
+         //double mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+         //double mfbbb = (D.f[DIR_000])[k1zero];
+         //double mfaaa = (D.f[DIR_PPP ])[k1tne ];
+         //double mfcca = (D.f[DIR_MMP ])[k1tsw ];
+         //double mfaca = (D.f[DIR_PMP ])[k1tse ];
+         //double mfcaa = (D.f[DIR_MPP ])[k1tnw ];
+         //double mfaac = (D.f[DIR_PPM ])[k1bne ];
+         //double mfccc = (D.f[DIR_MMM ])[k1bsw ];
+         //double mfacc = (D.f[DIR_PMM ])[k1bse ];
+         //double mfcac = (D.f[DIR_MPM ])[k1bnw ];
+         real mfabb = (D.f[DIR_P00   ])[k1e   ];
+         real mfcbb = (D.f[DIR_M00   ])[k1w   ];
+         real mfbab = (D.f[DIR_0P0   ])[k1n   ];
+         real mfbcb = (D.f[DIR_0M0   ])[k1s   ];
+         real mfbba = (D.f[DIR_00P   ])[k1t   ];
+         real mfbbc = (D.f[DIR_00M   ])[k1b   ];
+         real mfaab = (D.f[DIR_PP0  ])[k1ne  ];
+         real mfccb = (D.f[DIR_MM0  ])[k1sw  ];
+         real mfacb = (D.f[DIR_PM0  ])[k1se  ];
+         real mfcab = (D.f[DIR_MP0  ])[k1nw  ];
+         real mfaba = (D.f[DIR_P0P  ])[k1te  ];
+         real mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
+         real mfabc = (D.f[DIR_P0M  ])[k1be  ];
+         real mfcba = (D.f[DIR_M0P  ])[k1tw  ];
+         real mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
+         real mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
+         real mfbac = (D.f[DIR_0PM  ])[k1bn  ];
+         real mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+         real mfbbb = (D.f[DIR_000])[k1zero];
+         real mfaaa = (D.f[DIR_PPP ])[k1tne ];
+         real mfcca = (D.f[DIR_MMP ])[k1tsw ];
+         real mfaca = (D.f[DIR_PMP ])[k1tse ];
+         real mfcaa = (D.f[DIR_MPP ])[k1tnw ];
+         real mfaac = (D.f[DIR_PPM ])[k1bne ];
+         real mfccc = (D.f[DIR_MMM ])[k1bsw ];
+         real mfacc = (D.f[DIR_PMM ])[k1bse ];
+         real mfcac = (D.f[DIR_MPM ])[k1bnw ];
+
+         //real mfcbb = (D.f[DIR_P00   ])[ke   ];
+         //real mfabb = (D.f[DIR_M00   ])[kw   ];
+         //real mfbcb = (D.f[DIR_0P0   ])[kn   ];
+         //real mfbab = (D.f[DIR_0M0   ])[ks   ];
+         //real mfbbc = (D.f[DIR_00P   ])[kt   ];
+         //real mfbba = (D.f[DIR_00M   ])[kb   ];
+         //real mfccb = (D.f[DIR_PP0  ])[kne  ];
+         //real mfaab = (D.f[DIR_MM0  ])[ksw  ];
+         //real mfcab = (D.f[DIR_PM0  ])[kse  ];
+         //real mfacb = (D.f[DIR_MP0  ])[knw  ];
+         //real mfcbc = (D.f[DIR_P0P  ])[kte  ];
+         //real mfaba = (D.f[DIR_M0M  ])[kbw  ];
+         //real mfcba = (D.f[DIR_P0M  ])[kbe  ];
+         //real mfabc = (D.f[DIR_M0P  ])[ktw  ];
+         //real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
+         //real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
+         //real mfbca = (D.f[DIR_0PM  ])[kbn  ];
+         //real mfbac = (D.f[DIR_0MP  ])[kts  ];
+         //real mfbbb = (D.f[DIR_000])[kzero];
+         //real mfccc = (D.f[DIR_PPP ])[ktne ];
+         //real mfaac = (D.f[DIR_MMP ])[ktsw ];
+         //real mfcac = (D.f[DIR_PMP ])[ktse ];
+         //real mfacc = (D.f[DIR_MPP ])[ktnw ];
+         //real mfcca = (D.f[DIR_PPM ])[kbne ];
+         //real mfaaa = (D.f[DIR_MMM ])[kbsw ];
+         //real mfcaa = (D.f[DIR_PMM ])[kbse ];
+         //real mfaca = (D.f[DIR_MPM ])[kbnw ];
+         ////////////////////////////////////////////////////////////////////////////////////
+         //real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) +
+         //				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
+         //				((mfabb+mfcbb) + (mfbab+mfbcb)) + (mfbba+mfbbc)) + mfbbb) + one;//!!!!Achtung + one
+         ////////////////////////////////////////////////////////////////////////////////////
+         real rho = rhoBC[k];
+         ////////////////////////////////////////////////////////////////////////////////////
+         real OoRho = c1o1 / (rho * 1.5f);
+         ////////////////////////////////////////////////////////////////////////////////////
+         real vvx    = ((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) +
+                       (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
+                         (mfcbb-mfabb)) * OoRho;
+         real vvy    =((((mfccc-mfaaa) + (mfaca-mfcac)) + ((mfacc-mfcaa) + (mfcca-mfaac))) +
+                         (((mfbca-mfbac) + (mfbcc-mfbaa)) + ((mfacb-mfcab) + (mfccb-mfaab))) +
+                           (mfbcb-mfbab)) * OoRho;
+         real vvz    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfacc-mfcaa) + (mfaac-mfcca))) +
+                         (((mfbac-mfbca) + (mfbcc-mfbaa)) + ((mfabc-mfcba) + (mfcbc-mfaba))) +
+                           (mfbbc-mfbba)) * OoRho;
+         /////////////////////////
+         //Test Values
+         //double vvx    = 0.016;
+         //double vvy    = zero;
+         //double vvz    = zero;
+         ////////////////////////////////////////////////////////////////////////////////////////
+         ////round off error test
+         //if(vvx!=zero){
+         //	(kDistTest.f[DIR_P00   ])[k] = mfabb;
+         //	(kDistTest.f[DIR_M00   ])[k] = mfcbb;
+         //	(kDistTest.f[DIR_0P0   ])[k] = mfbab;
+         //	(kDistTest.f[DIR_0M0   ])[k] = mfbcb;
+         //	(kDistTest.f[DIR_00P   ])[k] = mfbba;
+         //	(kDistTest.f[DIR_00M   ])[k] = mfbbc;
+         //	(kDistTest.f[DIR_PP0  ])[k] = mfaab;
+         //	(kDistTest.f[DIR_MM0  ])[k] = mfccb;
+         //	(kDistTest.f[DIR_PM0  ])[k] = mfacb;
+         //	(kDistTest.f[DIR_MP0  ])[k] = mfcab;
+         //	(kDistTest.f[DIR_P0P  ])[k] = mfaba;
+         //	(kDistTest.f[DIR_M0M  ])[k] = mfcbc;
+         //	(kDistTest.f[DIR_P0M  ])[k] = mfabc;
+         //	(kDistTest.f[DIR_M0P  ])[k] = mfcba;
+         //	(kDistTest.f[DIR_0PP  ])[k] = mfbaa;
+         //	(kDistTest.f[DIR_0MM  ])[k] = mfbcc;
+         //	(kDistTest.f[DIR_0PM  ])[k] = mfbac;
+         //	(kDistTest.f[DIR_0MP  ])[k] = mfbca;
+         //	(kDistTest.f[DIR_000])[k] = KQK;
+         //	(kDistTest.f[DIR_PPP ])[k] = mfaaa;
+         //	(kDistTest.f[DIR_MMP ])[k] = mfcca;
+         //	(kDistTest.f[DIR_PMP ])[k] = mfaca;
+         //	(kDistTest.f[DIR_MPP ])[k] = mfcaa;
+         //	(kDistTest.f[DIR_PPM ])[k] = mfaac;
+         //	(kDistTest.f[DIR_MMM ])[k] = mfccc;
+         //	(kDistTest.f[DIR_PMM ])[k] = mfacc;
+         //	(kDistTest.f[DIR_MPM ])[k] = mfcac;
+         //}else{
+         //	(kDistTest.f[DIR_P00   ])[k] = zero;
+         //	(kDistTest.f[DIR_M00   ])[k] = zero;
+         //	(kDistTest.f[DIR_0P0   ])[k] = zero;
+         //	(kDistTest.f[DIR_0M0   ])[k] = zero;
+         //	(kDistTest.f[DIR_00P   ])[k] = zero;
+         //	(kDistTest.f[DIR_00M   ])[k] = zero;
+         //	(kDistTest.f[DIR_PP0  ])[k] = zero;
+         //	(kDistTest.f[DIR_MM0  ])[k] = zero;
+         //	(kDistTest.f[DIR_PM0  ])[k] = zero;
+         //	(kDistTest.f[DIR_MP0  ])[k] = zero;
+         //	(kDistTest.f[DIR_P0P  ])[k] = zero;
+         //	(kDistTest.f[DIR_M0M  ])[k] = zero;
+         //	(kDistTest.f[DIR_P0M  ])[k] = zero;
+         //	(kDistTest.f[DIR_M0P  ])[k] = zero;
+         //	(kDistTest.f[DIR_0PP  ])[k] = zero;
+         //	(kDistTest.f[DIR_0MM  ])[k] = zero;
+         //	(kDistTest.f[DIR_0PM  ])[k] = zero;
+         //	(kDistTest.f[DIR_0MP  ])[k] = zero;
+         //	(kDistTest.f[DIR_000])[k] = zero;
+         //	(kDistTest.f[DIR_PPP ])[k] = zero;
+         //	(kDistTest.f[DIR_MMP ])[k] = zero;
+         //	(kDistTest.f[DIR_PMP ])[k] = zero;
+         //	(kDistTest.f[DIR_MPP ])[k] = zero;
+         //	(kDistTest.f[DIR_PPM ])[k] = zero;
+         //	(kDistTest.f[DIR_MMM ])[k] = zero;
+         //	(kDistTest.f[DIR_PMM ])[k] = zero;
+         //	(kDistTest.f[DIR_MPM ])[k] = zero;
+         //}
+
+         //////////////////////////////////////////////////////////////////////////////////////
+         //// first bad fix for negative x velocity
+         ////if(vvx > zero) vvx = zero;
+         //////////////////////////////////////////////////////////////////////////////////////
+         ////// second bad fix for negative x velocity
+         ////if(vvx > zero){
+         ////	vvx = -vvx;
+         ////	vvy = -vvy;
+         ////	vvz = -vvz;
+         ////}
+         ////////////////////////////////////////////////////////////////////////////////////
+         double vx2    = vvx * vvx;
+         double vy2    = vvy * vvy;
+         double vz2    = vvz * vvz;
+         //////////////////////////////////////////////////////////////////////////////////
+         //original
             real XXb    = -c2o3 + vx2;
             real XXc    = -c1o2 * (XXb + c1o1 + vvx);
             real XXa    = XXc + vvx;
@@ -4045,66 +4045,66 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
             real ZZb    = -c2o3 + vz2;
             real ZZc    = -c1o2 * (ZZb + c1o1 + vvz);
             real ZZa    = ZZc + vvz;
-			//////////////////////////////////////////////////////////////////////////////////
-			//unkonditioniert
-            mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27; 
-			mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27;
-			mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27;
-			mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27;
-			mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27;
-			mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27;
-			mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54;
-			mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54;
-			mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54;
-			mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54;
-			mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54;
-			mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54;
-			mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54;
-			mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54;
-			mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54;
-			mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54;
-			mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54;
-			mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54;
-			mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27;
-			mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216;
-			mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216;
-			mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216;
-			mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216;
-			mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216;
-			mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216;
-			mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216;
-			mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216;
-			//////////////////////////////////////////////////////////
-			////konditioniert
-			//double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one);
-			//double OnoOver216Rho        = c1over216*rhoBC[k];
-			//mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2));
-			//mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2));
-			//mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2));
-			//mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2)));
-			//mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2));
-			//mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2)));
-			//mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))));
-			//mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))));
-			//mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)));
-			//mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)));
-			//mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))));
-			//mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))));
-			//mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)));
-			//mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)));
-			//mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))));
-			//mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))));
-			//mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)));
-			//mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)));
-			//mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2)));
-			//mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
-			//mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
-			//mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
-			//mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
-			//mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
-			//mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
-			//mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
-			//mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
+         //////////////////////////////////////////////////////////////////////////////////
+         //unkonditioniert
+            mfcbb = -(rhoBC[k] + c1o1) * XXc * YYb * ZZb - c2o27;
+         mfabb = -(rhoBC[k] + c1o1) * XXa * YYb * ZZb - c2o27;
+         mfbcb = -(rhoBC[k] + c1o1) * XXb * YYc * ZZb - c2o27;
+         mfbab = -(rhoBC[k] + c1o1) * XXb * YYa * ZZb - c2o27;
+         mfbbc = -(rhoBC[k] + c1o1) * XXb * YYb * ZZc - c2o27;
+         mfbba = -(rhoBC[k] + c1o1) * XXb * YYb * ZZa - c2o27;
+         mfccb = -(rhoBC[k] + c1o1) * XXc * YYc * ZZb - c1o54;
+         mfaab = -(rhoBC[k] + c1o1) * XXa * YYa * ZZb - c1o54;
+         mfcab = -(rhoBC[k] + c1o1) * XXc * YYa * ZZb - c1o54;
+         mfacb = -(rhoBC[k] + c1o1) * XXa * YYc * ZZb - c1o54;
+         mfcbc = -(rhoBC[k] + c1o1) * XXc * YYb * ZZc - c1o54;
+         mfaba = -(rhoBC[k] + c1o1) * XXa * YYb * ZZa - c1o54;
+         mfcba = -(rhoBC[k] + c1o1) * XXc * YYb * ZZa - c1o54;
+         mfabc = -(rhoBC[k] + c1o1) * XXa * YYb * ZZc - c1o54;
+         mfbcc = -(rhoBC[k] + c1o1) * XXb * YYc * ZZc - c1o54;
+         mfbaa = -(rhoBC[k] + c1o1) * XXb * YYa * ZZa - c1o54;
+         mfbca = -(rhoBC[k] + c1o1) * XXb * YYc * ZZa - c1o54;
+         mfbac = -(rhoBC[k] + c1o1) * XXb * YYa * ZZc - c1o54;
+         mfbbb = -(rhoBC[k] + c1o1) * XXb * YYb * ZZb - c8o27;
+         mfccc = -(rhoBC[k] + c1o1) * XXc * YYc * ZZc - c1o216;
+         mfaac = -(rhoBC[k] + c1o1) * XXa * YYa * ZZc - c1o216;
+         mfcac = -(rhoBC[k] + c1o1) * XXc * YYa * ZZc - c1o216;
+         mfacc = -(rhoBC[k] + c1o1) * XXa * YYc * ZZc - c1o216;
+         mfcca = -(rhoBC[k] + c1o1) * XXc * YYc * ZZa - c1o216;
+         mfaaa = -(rhoBC[k] + c1o1) * XXa * YYa * ZZa - c1o216;
+         mfcaa = -(rhoBC[k] + c1o1) * XXc * YYa * ZZa - c1o216;
+         mfaca = -(rhoBC[k] + c1o1) * XXa * YYc * ZZa - c1o216;
+         //////////////////////////////////////////////////////////
+         ////konditioniert
+         //double OneOver216RhoPlusOne = c1over216*(rhoBC[k]+one);
+         //double OnoOver216Rho        = c1over216*rhoBC[k];
+         //mfcbb = OnoOver216Rho*sixteen + OneOver216RhoPlusOne*twelve*(-(two*vy2) - two*vz2 + three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-two + three*vy2)*(-two + three*vz2));
+         //mfabb = OnoOver216Rho*sixteen - OneOver216RhoPlusOne*twelve*(two*vy2 + two*vz2 - three*vy2*vz2 + vvx*(-two + three*vy2)*(-two + three*vz2) + vx2*(-four + six*vy2 + six*vz2 - nine*vy2*vz2));
+         //mfbcb = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(one + three*vvy + three*vy2)*(-two + three*vz2));
+         //mfbab = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvy*(-two + three*vx2)*(-two + three*vz2) - one*vx2*(one + three*vy2)*(-two + three*vz2) + two*(-(two*vy2) + vz2 + three*vy2*vz2)));
+         //mfbbc = four*(-(four*OneOver216RhoPlusOne) + four*OnoOver216Rho + OneOver216RhoPlusOne*(-two + three*vx2)*(-two + three*vy2)*(one + three*vvz + three*vz2));
+         //mfbba = four*(four*OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz*(-two + three*vx2)*(-two + three*vy2) - one*vx2*(-two + three*vy2)*(one + three*vz2) + two*(vy2 - two*vz2 + three*vy2*vz2)));
+         //mfccb = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2))));
+         //mfaab = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) - two*vy2 - six*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2))));
+         //mfcab = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-two + three*vz2)));
+         //mfacb = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 + two*vy2 + six*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-two + three*vz2) + vvx*(one + three*vvy + three*vy2)*(-two + three*vz2)));
+         //mfcbc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2))));
+         //mfaba = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(-(two*vx2) + vy2 + three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) - two*vz2 - six*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2))));
+         //mfcba = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 + vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(-one + three*vvz - three*vz2)));
+         //mfabc = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(two*vx2 - one*vy2 - three*vx2*vy2 - one*vvz*(one + three*vx2)*(-two + three*vy2) + two*vz2 + six*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 + vvx*(-two + three*vy2)*(one + three*vvz + three*vz2)));
+         //mfbcc = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2))));
+         //mfbaa = -(two*(-(OnoOver216Rho*two) + OneOver216RhoPlusOne*three*(vx2 - two*vy2 + three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) - two*vz2 + three*vx2*vz2 - six*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2))));
+         //mfbca = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 + vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(-one + three*vvz - three*vz2)));
+         //mfbac = two*(OnoOver216Rho*two + OneOver216RhoPlusOne*three*(-(one*vx2) + two*vy2 - three*vx2*vy2 - one*vvz*(-two + three*vx2)*(one + three*vy2) + two*vz2 - three*vx2*vz2 + six*vy2*vz2 - nine*vx2*vy2*vz2 + vvy*(-two + three*vx2)*(one + three*vvz + three*vz2)));
+         //mfbbb = eight*(eight*OnoOver216Rho + OneOver216RhoPlusOne*three*(four*vy2 + four*vz2 - six*vy2*vz2 + vx2*(-two + three*vy2)*(-two + three*vz2)));
+         //mfccc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
+         //mfaac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) + vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
+         //mfcac = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(-one + three*vvy - three*vy2)*(one + three*vvz + three*vz2));
+         //mfacc = OnoOver216Rho + OneOver216RhoPlusOne*three*(vvz + vx2 + three*vvz*vx2 + vy2 + three*vvz*vy2 + three*vx2*vy2 + nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(one + three*vvz + three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(one + three*vvz + three*vz2));
+         //mfcca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) - one*vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
+         //mfaaa = OnoOver216Rho - OneOver216RhoPlusOne*three*(vvz - one*vx2 + three*vvz*vx2 - one*vy2 + three*vvz*vy2 - three*vx2*vy2 + nine*vvz*vx2*vy2 - one*vz2 - three*vx2*vz2 - three*vy2*vz2 - nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
+         //mfcaa = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 + vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(-one + three*vvy - three*vy2)*(-one + three*vvz - three*vz2));
+         //mfaca = OnoOver216Rho + OneOver216RhoPlusOne*three*(-(one*vvz) + vx2 - three*vvz*vx2 + vy2 - three*vvz*vy2 + three*vx2*vy2 - nine*vvz*vx2*vy2 + vz2 + three*vx2*vz2 + three*vy2*vz2 + nine*vx2*vy2*vz2 - one*vvy*(one + three*vx2)*(-one + three*vvz - three*vz2) + vvx*(one + three*vvy + three*vy2)*(-one + three*vvz - three*vz2));
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //if (isEvenTimestep==true)
@@ -4136,7 +4136,7 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
       //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
       //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
       //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      //} 
+      //}
       //else
       //{
       //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -4170,88 +4170,88 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 
-			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
-			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
-			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
-			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
-			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
-			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
-			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
-			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
-			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
-			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
-			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
-			//(D.f[DIR_P00   ])[ke   ] = mfcbb;
-			//(D.f[DIR_M00   ])[kw   ] = mfabb;
-			//(D.f[DIR_0P0   ])[kn   ] = mfbcb;
-			//(D.f[DIR_0M0   ])[ks   ] = mfbab;
-			//(D.f[DIR_00P   ])[kt   ] = mfbbc;
-			//(D.f[DIR_00M   ])[kb   ] = mfbba;
-			//(D.f[DIR_PP0  ])[kne  ] = mfccb;
-			//(D.f[DIR_MM0  ])[ksw  ] = mfaab;
-			//(D.f[DIR_PM0  ])[kse  ] = mfcab;
-			//(D.f[DIR_MP0  ])[knw  ] = mfacb;
-			//(D.f[DIR_P0P  ])[kte  ] = mfcbc;
-			//(D.f[DIR_M0M  ])[kbw  ] = mfaba;
-			//(D.f[DIR_P0M  ])[kbe  ] = mfcba;
-			//(D.f[DIR_M0P  ])[ktw  ] = mfabc;
-			//(D.f[DIR_0PP  ])[ktn  ] = mfbcc;
-			//(D.f[DIR_0MM  ])[kbs  ] = mfbaa;
-			//(D.f[DIR_0PM  ])[kbn  ] = mfbca;
-			//(D.f[DIR_0MP  ])[kts  ] = mfbac;
-			//(D.f[DIR_000])[kzero] = mfbbb;
-			//(D.f[DIR_PPP ])[ktne ] = mfccc;
-			//(D.f[DIR_MMP ])[ktsw ] = mfaac;
-			//(D.f[DIR_PMP ])[ktse ] = mfcac;
-			//(D.f[DIR_MPP ])[ktnw ] = mfacc;
-			//(D.f[DIR_PPM ])[kbne ] = mfcca;
-			//(D.f[DIR_MMM ])[kbsw ] = mfaaa;
-			//(D.f[DIR_PMM ])[kbse ] = mfcaa;
-			//(D.f[DIR_MPM ])[kbnw ] = mfaca;
-
-      //(D.f[DIR_P00   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;  
-      //(D.f[DIR_M00   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;  
-      //(D.f[DIR_0P0   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;  
-      //(D.f[DIR_0M0   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;  
-      //(D.f[DIR_00P   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;  
-      //(D.f[DIR_00M   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;  
-      //(D.f[DIR_PP0  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;  
-      //(D.f[DIR_MM0  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;  
-      //(D.f[DIR_PM0  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;  
-      //(D.f[DIR_MP0  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;  
-      //(D.f[DIR_P0P  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;  
-      //(D.f[DIR_M0M  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;  
-      //(D.f[DIR_P0M  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;  
-      //(D.f[DIR_M0P  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;  
-      //(D.f[DIR_0PP  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;  
-      //(D.f[DIR_0MM  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;  
-      //(D.f[DIR_0PM  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;  
-      //(D.f[DIR_0MP  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;  
+         (D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
+         (D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
+         (D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
+         (D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
+         (D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
+         (D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
+         (D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
+         (D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
+         (D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
+         (D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
+         (D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
+         (D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
+         (D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
+         (D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
+         (D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
+         (D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
+         (D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
+         (D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+         (D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
+         (D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
+         (D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
+         (D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
+         (D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
+         (D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
+         (D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
+         (D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
+         (D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
+         //(D.f[DIR_P00   ])[ke   ] = mfcbb;
+         //(D.f[DIR_M00   ])[kw   ] = mfabb;
+         //(D.f[DIR_0P0   ])[kn   ] = mfbcb;
+         //(D.f[DIR_0M0   ])[ks   ] = mfbab;
+         //(D.f[DIR_00P   ])[kt   ] = mfbbc;
+         //(D.f[DIR_00M   ])[kb   ] = mfbba;
+         //(D.f[DIR_PP0  ])[kne  ] = mfccb;
+         //(D.f[DIR_MM0  ])[ksw  ] = mfaab;
+         //(D.f[DIR_PM0  ])[kse  ] = mfcab;
+         //(D.f[DIR_MP0  ])[knw  ] = mfacb;
+         //(D.f[DIR_P0P  ])[kte  ] = mfcbc;
+         //(D.f[DIR_M0M  ])[kbw  ] = mfaba;
+         //(D.f[DIR_P0M  ])[kbe  ] = mfcba;
+         //(D.f[DIR_M0P  ])[ktw  ] = mfabc;
+         //(D.f[DIR_0PP  ])[ktn  ] = mfbcc;
+         //(D.f[DIR_0MM  ])[kbs  ] = mfbaa;
+         //(D.f[DIR_0PM  ])[kbn  ] = mfbca;
+         //(D.f[DIR_0MP  ])[kts  ] = mfbac;
+         //(D.f[DIR_000])[kzero] = mfbbb;
+         //(D.f[DIR_PPP ])[ktne ] = mfccc;
+         //(D.f[DIR_MMP ])[ktsw ] = mfaac;
+         //(D.f[DIR_PMP ])[ktse ] = mfcac;
+         //(D.f[DIR_MPP ])[ktnw ] = mfacc;
+         //(D.f[DIR_PPM ])[kbne ] = mfcca;
+         //(D.f[DIR_MMM ])[kbsw ] = mfaaa;
+         //(D.f[DIR_PMM ])[kbse ] = mfcaa;
+         //(D.f[DIR_MPM ])[kbnw ] = mfaca;
+
+      //(D.f[DIR_P00   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;
+      //(D.f[DIR_M00   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;
+      //(D.f[DIR_0P0   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;
+      //(D.f[DIR_0M0   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;
+      //(D.f[DIR_00P   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;
+      //(D.f[DIR_00M   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;
+      //(D.f[DIR_PP0  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;
+      //(D.f[DIR_MM0  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;
+      //(D.f[DIR_PM0  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;
+      //(D.f[DIR_MP0  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;
+      //(D.f[DIR_P0P  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;
+      //(D.f[DIR_M0M  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;
+      //(D.f[DIR_P0M  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;
+      //(D.f[DIR_M0P  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;
+      //(D.f[DIR_0PP  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;
+      //(D.f[DIR_0MM  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;
+      //(D.f[DIR_0PM  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;
+      //(D.f[DIR_0MP  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;
       //(D.f[DIR_000])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO;
-      //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE; 
-      //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW; 
-      //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE; 
-      //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW; 
-      //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE; 
-      //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW; 
-      //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE; 
-      //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW; 
+      //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE;
+      //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW;
+      //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE;
+      //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW;
+      //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE;
+      //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW;
+      //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE;
+      //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4295,19 +4295,19 @@ __global__ void QPressDeviceEQZ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void QPressDeviceZero27(	 real* DD, 
-												 int* k_Q, 
-												 unsigned int numberOfBCnodes, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+__global__ void QPressDeviceZero27(	 real* DD,
+                                     int* k_Q,
+                                     unsigned int numberOfBCnodes,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned int size_Mat,
+                                     bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -4378,7 +4378,7 @@ __global__ void QPressDeviceZero27(	 real* DD,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -4411,7 +4411,7 @@ __global__ void QPressDeviceZero27(	 real* DD,
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
-	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+     //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       (D.f[DIR_P00   ])[ke   ] =c0o1;
       (D.f[DIR_M00   ])[kw   ] =c0o1;
       (D.f[DIR_0P0   ])[kn   ] =c0o1;
@@ -4483,21 +4483,21 @@ __global__ void QPressDeviceZero27(	 real* DD,
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 __global__ void QPressDeviceFake27(	 real* rhoBC,
-												 real* DD, 
-												 int* k_Q, 
-												 int* k_N, 
-												 int numberOfBCnodes, 
-												 real om1, 
-												 unsigned int* neighborX,
-												 unsigned int* neighborY,
-												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+                                     real* DD,
+                                     int* k_Q,
+                                     int* k_N,
+                                     int numberOfBCnodes,
+                                     real om1,
+                                     unsigned int* neighborX,
+                                     unsigned int* neighborY,
+                                     unsigned int* neighborZ,
+                                     unsigned int size_Mat,
+                                     bool isEvenTimestep)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
@@ -4598,7 +4598,7 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-      } 
+      }
       else
       {
          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
@@ -4665,24 +4665,24 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
       real vx1, vx2, vx3;
       vx1    =  ((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                   ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
-                  (f1_E - f1_W); 
+                  (f1_E - f1_W);
 
 
       vx2    =   (-(f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                   ((f1_BN - f1_TS)   + (f1_TN - f1_BS))    + (-(f1_SE - f1_NW)  + (f1_NE - f1_SW)) +
-                  (f1_N - f1_S); 
+                  (f1_N - f1_S);
 
       vx3    =   ((f1_TSE - f1_BNW) + (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) + (f1_TSW - f1_BNE)) +
                   (-(f1_BN - f1_TS)  + (f1_TN - f1_BS))   + ((f1_TE - f1_BW)   - (f1_BE - f1_TW)) +
-                  (f1_T - f1_B); 
+                  (f1_T - f1_B);
 
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
-	  //drho1 = (drho1 + rhoBC[k])/2.f;
-	  drho1 = drho1 - rhoBC[k];
+     //drho1 = (drho1 + rhoBC[k])/2.f;
+     drho1 = drho1 - rhoBC[k];
 
       __syncthreads();
 
@@ -4712,7 +4712,7 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
       (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
       (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4757,460 +4757,460 @@ __global__ void QPressDeviceFake27(	 real* rhoBC,
 
 //////////////////////////////////////////////////////////////////////////
 __global__ void QPressDevice27_IntBB(real* rho,
-												real* DD, 
-												int* k_Q, 
-												real* QQ,
-												unsigned int numberOfBCnodes, 
-												real om1, 
-												unsigned int* neighborX,
-												unsigned int* neighborY,
-												unsigned int* neighborZ,
-												unsigned int size_Mat, 
-												bool isEvenTimestep)
+                                    real* DD,
+                                    int* k_Q,
+                                    real* QQ,
+                                    unsigned int numberOfBCnodes,
+                                    real om1,
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int size_Mat,
+                                    bool isEvenTimestep)
 {
-	Distributions27 D;
-	if (isEvenTimestep==true)
-	{
-		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-	} 
-	else
-	{
-		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-		D.f[DIR_000] = &DD[DIR_000*size_Mat];
-		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-	}
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if(k < numberOfBCnodes)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		//real VeloX = vx[k];
-		//real VeloY = vy[k];
-		//real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny)
-		////////////////////////////////////////////////////////////////////////////////
-		real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-			*q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
-		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
-		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
-		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
-		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
-		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
-		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
-		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
-		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
-		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
-		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
-		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
-		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
-		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
-		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
-		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
-		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
-		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
-		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
-		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
-		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
-		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
-		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
-		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
-		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
-		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
-		////////////////////////////////////////////////////////////////////////////////
-		//index
-		unsigned int KQK  = k_Q[k];
-		unsigned int kzero= KQK;
-		unsigned int ke   = KQK;
-		unsigned int kw   = neighborX[KQK];
-		unsigned int kn   = KQK;
-		unsigned int ks   = neighborY[KQK];
-		unsigned int kt   = KQK;
-		unsigned int kb   = neighborZ[KQK];
-		unsigned int ksw  = neighborY[kw];
-		unsigned int kne  = KQK;
-		unsigned int kse  = ks;
-		unsigned int knw  = kw;
-		unsigned int kbw  = neighborZ[kw];
-		unsigned int kte  = KQK;
-		unsigned int kbe  = kb;
-		unsigned int ktw  = kw;
-		unsigned int kbs  = neighborZ[ks];
-		unsigned int ktn  = KQK;
-		unsigned int kbn  = kb;
-		unsigned int kts  = ks;
-		unsigned int ktse = ks;
-		unsigned int kbnw = kbw;
-		unsigned int ktnw = kw;
-		unsigned int kbse = kbs;
-		unsigned int ktsw = ksw;
-		unsigned int kbne = kb;
-		unsigned int ktne = KQK;
-		unsigned int kbsw = neighborZ[ksw];
-		////////////////////////////////////////////////////////////////////////////////
-		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
-			f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-
-		f_W    = (D.f[DIR_P00   ])[ke   ];
-		f_E    = (D.f[DIR_M00   ])[kw   ];
-		f_S    = (D.f[DIR_0P0   ])[kn   ];
-		f_N    = (D.f[DIR_0M0   ])[ks   ];
-		f_B    = (D.f[DIR_00P   ])[kt   ];
-		f_T    = (D.f[DIR_00M   ])[kb   ];
-		f_SW   = (D.f[DIR_PP0  ])[kne  ];
-		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
-		f_NW   = (D.f[DIR_PM0  ])[kse  ];
-		f_SE   = (D.f[DIR_MP0  ])[knw  ];
-		f_BW   = (D.f[DIR_P0P  ])[kte  ];
-		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
-		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
-		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
-		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
-		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
-		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
-		f_BN   = (D.f[DIR_0MP  ])[kts  ];
-		f_BSW  = (D.f[DIR_PPP ])[ktne ];
-		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
-		f_BNW  = (D.f[DIR_PMP ])[ktse ];
-		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
-		f_TSW  = (D.f[DIR_PPM ])[kbne ];
-		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
-		f_TNW  = (D.f[DIR_PMM ])[kbse ];
-		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
-		////////////////////////////////////////////////////////////////////////////////
-		real vx1, vx2, vx3, drho, feq, q;
-		drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-			f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
-
-		vx1    = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-			((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-			(f_E - f_W))/(c1o1+drho); 
-
-
-		vx2    =  ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-			((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-			(f_N - f_S))/(c1o1+drho); 
-
-		vx3    =  (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-			(-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-			(f_T - f_B))/(c1o1+drho); 
-
-		real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
-
-		//////////////////////////////////////////////////////////////////////////
-		if (isEvenTimestep==false)
-		{
-			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
-		} 
-		else
-		{
-			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
-			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
-			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
-			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
-			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
-			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
-			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
-			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
-			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
-			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
-			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
-			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
-			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
-			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
-			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
-			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
-			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
-			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
-			D.f[DIR_000] = &DD[DIR_000*size_Mat];
-			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
-			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
-			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
-			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
-			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
-			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
-			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
-			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
-		}
-		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		//Test
-		//(D.f[DIR_000])[k]=c1o10;
-		real rhoDiff = drho - rho[k];
-		real VeloX = vx1;
-		real VeloY = vx2;
-		real VeloZ = vx3;
-		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-		q = q_dirE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*( vx1        )*( vx1        )-cu_sq); 
-			(D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
-		}
-
-		q = q_dirW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-			(D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
-		}
-
-		q = q_dirN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-			(D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
-		}
-
-		q = q_dirS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-			(D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
-		}
-
-		q = q_dirT[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(         vx3)*(         vx3)-cu_sq); 
-			(D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
-		}
-
-		q = q_dirB[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c2o27* (drho+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-			(D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
-		}
-
-		q = q_dirNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-			(D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-			(D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-			(D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-			(D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
-		}
-
-		q = q_dirTE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-			(D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-			(D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-			(D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-			(D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-			(D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-			(D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBN[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-			(D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTS[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o54* (drho+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-			(D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-			(D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-			(D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBNE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-			(D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTSW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-			(D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-			(D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-			(D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirBSE[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-			(D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
-		}
-
-		q = q_dirTNW[k];
-		if (q>=c0o1 && q<=c1o1)
-		{
-			feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-			(D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
-		}
-	}
+   Distributions27 D;
+   if (isEvenTimestep==true)
+   {
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   }
+   else
+   {
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   }
+   ////////////////////////////////////////////////////////////////////////////////
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
+
+   const unsigned nx = blockDim.x;
+   const unsigned ny = gridDim.x;
+
+   const unsigned k = nx*(ny*z + y) + x;
+   //////////////////////////////////////////////////////////////////////////
+
+   if(k < numberOfBCnodes)
+   {
+      ////////////////////////////////////////////////////////////////////////////////
+      //real VeloX = vx[k];
+      //real VeloY = vy[k];
+      //real VeloZ = vz[k]; //(16.0*(u0*2.0)*bbx*bby*(grid_nx-bbx)*(grid_ny-bby))/(grid_nx*grid_nx*grid_ny*grid_ny)
+      ////////////////////////////////////////////////////////////////////////////////
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
+         *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
+         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
+         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
+         *q_dirBSE, *q_dirBNW;
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
+      ////////////////////////////////////////////////////////////////////////////////
+      //index
+      unsigned int KQK  = k_Q[k];
+      unsigned int kzero= KQK;
+      unsigned int ke   = KQK;
+      unsigned int kw   = neighborX[KQK];
+      unsigned int kn   = KQK;
+      unsigned int ks   = neighborY[KQK];
+      unsigned int kt   = KQK;
+      unsigned int kb   = neighborZ[KQK];
+      unsigned int ksw  = neighborY[kw];
+      unsigned int kne  = KQK;
+      unsigned int kse  = ks;
+      unsigned int knw  = kw;
+      unsigned int kbw  = neighborZ[kw];
+      unsigned int kte  = KQK;
+      unsigned int kbe  = kb;
+      unsigned int ktw  = kw;
+      unsigned int kbs  = neighborZ[ks];
+      unsigned int ktn  = KQK;
+      unsigned int kbn  = kb;
+      unsigned int kts  = ks;
+      unsigned int ktse = ks;
+      unsigned int kbnw = kbw;
+      unsigned int ktnw = kw;
+      unsigned int kbse = kbs;
+      unsigned int ktsw = ksw;
+      unsigned int kbne = kb;
+      unsigned int ktne = KQK;
+      unsigned int kbsw = neighborZ[ksw];
+      ////////////////////////////////////////////////////////////////////////////////
+      real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
+         f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
+
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      ////////////////////////////////////////////////////////////////////////////////
+      real vx1, vx2, vx3, drho, feq, q;
+      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+         f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
+         f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
+
+      vx1    = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+         ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+         (f_E - f_W))/(c1o1+drho);
+
+
+      vx2    =  ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+         ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+         (f_N - f_S))/(c1o1+drho);
+
+      vx3    =  (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+         (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+         (f_T - f_B))/(c1o1+drho);
+
+      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
+
+      //////////////////////////////////////////////////////////////////////////
+      if (isEvenTimestep==false)
+      {
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      }
+      else
+      {
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      }
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      //Test
+      //(D.f[DIR_000])[k]=c1o10;
+      real rhoDiff = drho - rho[k];
+      real VeloX = vx1;
+      real VeloY = vx2;
+      real VeloZ = vx3;
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      q = q_dirE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*( vx1        )*( vx1        )-cu_sq);
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
+      }
+
+      q = q_dirW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
+      }
+
+      q = q_dirN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
+      }
+
+      q = q_dirS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
+      }
+
+      q = q_dirT[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(         vx3)*(         vx3)-cu_sq);
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
+      }
+
+      q = q_dirB[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c2o27* (drho+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
+      }
+
+      q = q_dirNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
+      }
+
+      q = q_dirTE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o54* (drho+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirBSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
+      }
+
+      q = q_dirTNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
+      }
+   }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
index 70b0c4352afee850a4e17243979268bd126b7b4a..477539348706de7410319045fa075a6cdf31d01c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
@@ -6,132 +6,132 @@
 
 #include <cuda.h>
 
-template<TurbulenceModel turbulenceModel> 
+template<TurbulenceModel turbulenceModel>
 std::shared_ptr< CumulantK17<turbulenceModel> > CumulantK17<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
-	return std::shared_ptr<CumulantK17<turbulenceModel> >(new CumulantK17<turbulenceModel>(para,level));
+    return std::shared_ptr<CumulantK17<turbulenceModel> >(new CumulantK17<turbulenceModel>(para,level));
 }
 
 template<TurbulenceModel turbulenceModel>
 void CumulantK17<turbulenceModel>::run()
 {
-	LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads >>>(   para->getParD(level)->omega,										
-																										para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																										para->getParD(level)->distributions.f[0],	
-																										para->getParD(level)->rho,		
-																										para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																										para->getParD(level)->turbViscosity,
-																										para->getSGSConstant(),
-																										(unsigned long)para->getParD(level)->numberOfNodes,	
-																										level,			
-																										para->getForcesDev(),				
-																										para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																										para->getQuadricLimitersDev(),			
-																										para->getParD(level)->isEvenTimestep,
-																										para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default],
-																										para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]);
+    LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads >>>(   para->getParD(level)->omega,
+                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                        para->getParD(level)->rho,
+                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                        para->getSGSConstant(),
+                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                        level,
+                                                                                                        para->getForcesDev(),
+                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                        para->getParD(level)->taggedFluidNodeIndices[CollisionTemplate::Default],
+                                                                                                        para->getParD(level)->numberOfTaggedFluidNodes[CollisionTemplate::Default]);
 
-	getLastCudaError("LB_Kernel_CumulantK17 execution failed");
+    getLastCudaError("LB_Kernel_CumulantK17 execution failed");
 }
 
 template<TurbulenceModel turbulenceModel>
 void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, unsigned int size_indices, CollisionTemplate collisionTemplate, CudaStreamIndex streamIndex )
 {
-	cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
-	
-	switch (collisionTemplate)
-	{
-		case CollisionTemplate::Default:
-			LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega,								
-																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																														para->getParD(level)->distributions.f[0],	
-																														para->getParD(level)->rho,		
-																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																														para->getParD(level)->turbViscosity,
-																														para->getSGSConstant(),
-																														(unsigned long)para->getParD(level)->numberOfNodes,	
-																														level,			
-																														para->getForcesDev(),				
-																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																														para->getQuadricLimitersDev(),			
-																														para->getParD(level)->isEvenTimestep,
-																														indices,
-																														size_indices);
-			break;
-		
-		case CollisionTemplate::WriteMacroVars:
-			LB_Kernel_CumulantK17 < turbulenceModel, true, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,										
-																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																														para->getParD(level)->distributions.f[0],	
-																														para->getParD(level)->rho,		
-																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																														para->getParD(level)->turbViscosity,
-																														para->getSGSConstant(),
-																														(unsigned long)para->getParD(level)->numberOfNodes,	
-																														level,			
-																														para->getForcesDev(),				
-																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																														para->getQuadricLimitersDev(),			
-																														para->getParD(level)->isEvenTimestep,
-																														indices,
-																														size_indices);
-			break;
-		
-		case CollisionTemplate::SubDomainBorder:
-		case CollisionTemplate::AllFeatures:
-			LB_Kernel_CumulantK17 < turbulenceModel, true, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(  para->getParD(level)->omega,
-																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																														para->getParD(level)->distributions.f[0],	
-																														para->getParD(level)->rho,		
-																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																														para->getParD(level)->turbViscosity,
-																														para->getSGSConstant(),
-																														(unsigned long)para->getParD(level)->numberOfNodes,	
-																														level,			
-																														para->getForcesDev(),				
-																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																														para->getQuadricLimitersDev(),			
-																														para->getParD(level)->isEvenTimestep,
-																														indices,
-																														size_indices);
-			break;	case CollisionTemplate::ApplyBodyForce:
-			LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,									
-																														para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,	
-																														para->getParD(level)->distributions.f[0],	
-																														para->getParD(level)->rho,		
-																														para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,	
-																														para->getParD(level)->turbViscosity,
-																														para->getSGSConstant(),
-																														(unsigned long)para->getParD(level)->numberOfNodes,	
-																														level,			
-																														para->getForcesDev(),				
-																														para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
-																														para->getQuadricLimitersDev(),			
-																														para->getParD(level)->isEvenTimestep,
-																														indices,
-																														size_indices);
-			break;	default:
-			throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
-			break;
-	}
+    cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
+
+    switch (collisionTemplate)
+    {
+        case CollisionTemplate::Default:
+            LB_Kernel_CumulantK17 < turbulenceModel, false, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;
+
+        case CollisionTemplate::WriteMacroVars:
+            LB_Kernel_CumulantK17 < turbulenceModel, true, false  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;
 
-	getLastCudaError("LB_Kernel_CumulantK17 execution failed");
+        case CollisionTemplate::SubDomainBorder:
+        case CollisionTemplate::AllFeatures:
+            LB_Kernel_CumulantK17 < turbulenceModel, true, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>(  para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;	case CollisionTemplate::ApplyBodyForce:
+            LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
+                                                                                                                        para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
+                                                                                                                        para->getParD(level)->distributions.f[0],
+                                                                                                                        para->getParD(level)->rho,
+                                                                                                                        para->getParD(level)->velocityX, para->getParD(level)->velocityY, para->getParD(level)->velocityZ,
+                                                                                                                        para->getParD(level)->turbViscosity,
+                                                                                                                        para->getSGSConstant(),
+                                                                                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                                                                                        level,
+                                                                                                                        para->getForcesDev(),
+                                                                                                                        para->getParD(level)->forceX_SP, para->getParD(level)->forceY_SP, para->getParD(level)->forceZ_SP,
+                                                                                                                        para->getQuadricLimitersDev(),
+                                                                                                                        para->getParD(level)->isEvenTimestep,
+                                                                                                                        indices,
+                                                                                                                        size_indices);
+            break;	default:
+            throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
+            break;
+    }
+
+    getLastCudaError("LB_Kernel_CumulantK17 execution failed");
 }
 
 template<TurbulenceModel turbulenceModel>
 CumulantK17<turbulenceModel>::CumulantK17(std::shared_ptr<Parameter> para, int level)
 {
-	this->para = para;
-	this->level = level;
+    this->para = para;
+    this->level = level;
+
+    myPreProcessorTypes.push_back(InitCompSP27);
 
-	myPreProcessorTypes.push_back(InitCompSP27);
+    myKernelGroup = BasicKernel;
 
-	myKernelGroup = BasicKernel;
+    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+    this->kernelUsesFluidNodeIndices = true;
 
-	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
-	this->kernelUsesFluidNodeIndices = true;
-	
-	VF_LOG_INFO("Using turbulence model: {}", turbulenceModel);
+    VF_LOG_INFO("Using turbulence model: {}", turbulenceModel);
 }
 
 template class CumulantK17<TurbulenceModel::AMD>;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
index 34a444230019a1c6cfacb18e12fd73607fb2e09a..77e5172dae7b0ff6b51ed79a0a4356c7461801c5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
@@ -1,28 +1,28 @@
 
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -32,17 +32,17 @@
 //! \brief Kernel for CumulantK17 including different turbulence models and options for local body forces and writing macroscopic variables
 //!
 //! CumulantK17 kernel using chimera transformations and quartic limiters as present in Geier et al. (2017). Additional options are three different
-//! eddy-viscosity turbulence models (Smagorinsky, AMD, QR) that can be set via the template parameter turbulenceModel (with default 
-//! TurbulenceModel::None). 
+//! eddy-viscosity turbulence models (Smagorinsky, AMD, QR) that can be set via the template parameter turbulenceModel (with default
+//! TurbulenceModel::None).
 //! The kernel is executed separately for each subset of fluid node indices with a different tag CollisionTemplate. For each subset, only the locally
-//! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default 
-//! refers to the plain cumlant kernel (CollisionTemplate::Default). 
-//! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets 
+//! required options are switched on ( \param writeMacroscopicVariables and/or \param applyBodyForce) in order to minimize memory accesses. The default
+//! refers to the plain cumlant kernel (CollisionTemplate::Default).
+//! Nodes are added to subsets (taggedFluidNodes) in Simulation::init using a corresponding tag with different values of CollisionTemplate. These subsets
 //! are provided by the utilized PostCollisionInteractiors depending on they specifc requirements (e.g. writeMacroscopicVariables for probes).
 
 //=======================================================================================
 /* Device code */
-#include "LBM/LB.h" 
+#include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
 #include "Kernel/Utilities/DistributionHelper.cuh"
@@ -57,25 +57,25 @@ using namespace vf::lbm::dir;
 ////////////////////////////////////////////////////////////////////////////////
 template<TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce>
 __global__ void LB_Kernel_CumulantK17(
-	real omega_in,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
+    real omega_in,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
     real* rho,
     real* vx,
     real* vy,
     real* vz,
     real* turbulentViscosity,
     real SGSconstant,
-	unsigned long numberOfLBnodes,
-	int level,
-	real* forces,
+    unsigned long numberOfLBnodes,
+    int level,
+    real* forces,
     real* bodyForceX,
     real* bodyForceY,
     real* bodyForceZ,
-	real* quadricLimiters,
-	bool isEvenTimestep,
+    real* quadricLimiters,
+    bool isEvenTimestep,
     const uint *fluidNodeIndices,
     uint numberOfFluidNodes)
 {
@@ -91,10 +91,10 @@ __global__ void LB_Kernel_CumulantK17(
     //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
     const unsigned kThread = vf::gpu::getNodeIndex();
-    
+
     //////////////////////////////////////////////////////////////////////////
     // run for all indices in size_Mat and fluid nodes
-    if (kThread >= numberOfFluidNodes) 
+    if (kThread >= numberOfFluidNodes)
         return;
     ////////////////////////////////////////////////////////////////////////////////
     //! - Get the node index from the array containing all indices of fluid nodes
@@ -202,7 +202,7 @@ __global__ void LB_Kernel_CumulantK17(
     real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
                 (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
             oneOverRho;
-    
+
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
     //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
@@ -212,13 +212,13 @@ __global__ void LB_Kernel_CumulantK17(
     for (size_t i = 1; i <= level; i++) {
         factor *= c2o1;
     }
-    
+
     real fx = forces[0];
     real fy = forces[1];
     real fz = forces[2];
 
     if( applyBodyForce ){
-        fx += bodyForceX[k_000]; 
+        fx += bodyForceX[k_000];
         fy += bodyForceY[k_000];
         fz += bodyForceZ[k_000];
 
@@ -232,7 +232,7 @@ __global__ void LB_Kernel_CumulantK17(
         vvx += acc_x;
         vvy += acc_y;
         vvz += acc_z;
-        
+
         // Reset body force. To be used when not using round-off correction.
         bodyForceX[k_000] = 0.0f;
         bodyForceY[k_000] = 0.0f;
@@ -243,9 +243,9 @@ __global__ void LB_Kernel_CumulantK17(
         //!
         //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
         //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation.
-        //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can  
+        //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can
         //!> differ by several orders of magnitude.
-        //!> \note 16/05/2022: Testing, still ongoing! 
+        //!> \note 16/05/2022: Testing, still ongoing!
         //!
         // bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1;
         // bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1;
@@ -256,7 +256,7 @@ __global__ void LB_Kernel_CumulantK17(
         vvy += fy * c1o2 / factor;
         vvz += fz * c1o2 / factor;
     }
-    
+
 
     ////////////////////////////////////////////////////////////////////////////////////
     // calculate the square of velocities for this lattice node
@@ -723,4 +723,4 @@ template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::Smagorinsky, f
 
 template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::QR, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
 
-template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
\ No newline at end of file
+template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::None, false, false > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
index b8cc9543e9b531c5aa90cb2961416a6cbc52377d..55c22def9c43ab2678fc808043859f43021270a5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cuh
@@ -5,25 +5,25 @@
 #include <curand.h>
 
 template< TurbulenceModel turbulenceModel, bool writeMacroscopicVariables, bool applyBodyForce > __global__ void LB_Kernel_CumulantK17(
-	real omega_in,
-	uint* neighborX,
-	uint* neighborY,
-	uint* neighborZ,
-	real* distributions,
-	real* rho,
-	real* vx,
+    real omega_in,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    real* rho,
+    real* vx,
     real* vy,
     real* vz,
-	real* turbulentViscosity,
-	real SGSconstant,
-	unsigned long numberOfLBnodes,
-	int level,
-	real* forces,
-	real* bodyForceX,
-	real* bodyForceY,
-	real* bodyForceZ,
-	real* quadricLimiters,
-	bool isEvenTimestep,
-	const uint *fluidNodeIndices,
+    real* turbulentViscosity,
+    real SGSconstant,
+    unsigned long numberOfLBnodes,
+    int level,
+    real* forces,
+    real* bodyForceX,
+    real* bodyForceY,
+    real* bodyForceZ,
+    real* quadricLimiters,
+    bool isEvenTimestep,
+    const uint *fluidNodeIndices,
     uint numberOfFluidNodes);
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index 1310cf09e5ce05270e88cd3a53e4910816bd7628..e8fc3f318c920be36be7861a28659124a7b1e977 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -428,4 +428,4 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
     
     real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.timeStepsBetweenReads;
     precursorBoundaryConditionPost(para->getParD(level).get(), &para->getParD(level)->precursorBC, tRatio, para->getVelocityRatio());
-}
\ No newline at end of file
+}
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index a7517ca0f07b9c1379501aa3ff750637d230a7bd..cfdbbbae040a13f94e97d40d702b93d5a1e19c86 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -15,9 +15,9 @@
 
 //////////////////////////
 //porous media
-#define GEO_PM_0		 5
-#define GEO_PM_1		 6
-#define GEO_PM_2		 7
+#define GEO_PM_0         5
+#define GEO_PM_1         6
+#define GEO_PM_2         7
 //////////////////////////
 
 #define GEO_SOLID       15
@@ -191,7 +191,7 @@ typedef struct OffFC{
 
 // Distribution functions g 6
 typedef struct  Distri6 {
-	real* g[6];
+   real* g[6];
 } Distributions6;
 
 // Distribution functions f 7
@@ -284,55 +284,53 @@ typedef struct WMparas{
 
 //measurePoints
 typedef struct MeasP{
-	std::string name;
-	uint k;
-	std::vector<real> Vx;
-	std::vector<real> Vy;
-	std::vector<real> Vz;
-	std::vector<real> Rho;
-	//real* Vx;
-	//real* Vy;
-	//real* Vz;
-	//real* Rho;
+   std::string name;
+   uint k;
+   std::vector<real> Vx;
+   std::vector<real> Vy;
+   std::vector<real> Vz;
+   std::vector<real> Rho;
+   //real* Vx;
+   //real* Vy;
+   //real* Vz;
+   //real* Rho;
 }MeasurePoints;
 
 //Process Neighbors
 typedef struct PN27{
-	real* f[27];
-	uint memsizeFs;
-	int* index;
-	uint memsizeIndex;
-	uint rankNeighbor;
-	int numberOfNodes;
-	int numberOfFs;
+   real* f[27];
+   uint memsizeFs;
+   int* index;
+   uint memsizeIndex;
+   uint rankNeighbor;
+   int numberOfNodes;
+   int numberOfFs;
 }ProcessNeighbor27;
 
 typedef struct PN_F3 {
-	real* g[6];
-	uint memsizeGs;
-	int* index;
-	uint memsizeIndex;
-	uint rankNeighbor;
-	int numberOfNodes;
-	int numberOfGs;
+   real* g[6];
+   uint memsizeGs;
+   int* index;
+   uint memsizeIndex;
+   uint rankNeighbor;
+   int numberOfNodes;
+   int numberOfGs;
 }ProcessNeighborF3;
 
 //path line particles
 typedef struct PLP{
-	bool *stuck, *hot;
-	real *coordXabsolut, *coordYabsolut, *coordZabsolut;
-	real *coordXlocal,   *coordYlocal,   *coordZlocal;
-	real *veloX,         *veloY,         *veloZ;
-	real *randomLocationInit;
-	uint *timestep;
-	uint *ID;
-	uint *cellBaseID;
-	uint numberOfParticles, numberOfTimestepsParticles;
-	uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC;
+   bool *stuck, *hot;
+   real *coordXabsolut, *coordYabsolut, *coordZabsolut;
+   real *coordXlocal,   *coordYlocal,   *coordZlocal;
+   real *veloX,         *veloY,         *veloZ;
+   real *randomLocationInit;
+   uint *timestep;
+   uint *ID;
+   uint *cellBaseID;
+   uint numberOfParticles, numberOfTimestepsParticles;
+   uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC;
 }PathLineParticles;
 
-
-
 //////////////////////////////////////////////////////////////////////////
 inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 {
@@ -341,7 +339,4 @@ inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 }
 //////////////////////////////////////////////////////////////////////////
 
-
 #endif
-
-
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
index 53e030c0d16116a4edef0135f9ab435c853fd66b..95fea46d4eba0c2f2ff0846d22ee5da4f6c357ea 100644
--- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -45,7 +45,7 @@
 class DistributionDebugInspector
 {
 public:
-	DistributionDebugInspector(uint _inspectionLevel, real _minX, real _maxX, real _minY, real _maxY, real _minZ, real _maxZ, std::string _tag):
+    DistributionDebugInspector(uint _inspectionLevel, real _minX, real _maxX, real _minY, real _maxY, real _minZ, real _maxZ, std::string _tag):
     inspectionLevel(_inspectionLevel),
     minX(_minX),
     maxX(_maxX),
@@ -55,8 +55,8 @@ public:
     maxZ(_maxZ),
     tag(_tag)
     {};
-	
-    ~DistributionDebugInspector(){}
+
+    ~DistributionDebugInspector() = default;
 
     void inspect(std::shared_ptr<Parameter> para, uint level, uint t);
 
@@ -73,4 +73,4 @@ std::string tag;
 
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
index f771ce10cdfe81237ae1caa8a26f887488839aa4..8e21cdb6b21efd323f6723e21d6b28614109f1ec 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.h
@@ -44,10 +44,7 @@ public:
             throw std::runtime_error("ActuatorFarm::ActuatorFarm: epsilon needs to be larger than dx!");
     }
 
-    virtual  ~ActuatorFarm()
-    {
-        
-    }
+    ~ActuatorFarm() override = default;
     void addTurbine(real turbinePosX, real turbinePosY, real turbinePosZ, real diameter, real omega, real azimuth, real yaw, std::vector<real> bladeRadii);
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
     void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
@@ -197,4 +194,4 @@ private:
     int streamIndex;
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
index 3959fb2d8f5e719084d2d55e6f6ef84076383b45..3bae63a339255f3f72196e20096f6019cdd7748d 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h
@@ -158,4 +158,4 @@ private:
     uint writePrecision = 8;
 };
 
-#endif //PRECURSORPROBE_H_
\ No newline at end of file
+#endif //PRECURSORPROBE_H_