diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index 2752f367343b52a0247525bc2ffd7c933537013d..1d852b6d5e6c5124348fafc5472685946852f455 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -67,8 +67,16 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-std::string path("E:/temp/MusselOysterResults");
-std::string gridPathParent = "E:/temp/GridMussel/";
+//  Tesla 03
+// std::string outPath("E:/temp/MusselOysterResults/");
+// std::string gridPathParent = "E:/temp/GridMussel/";
+// std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/");
+// std::string simulationName("MusselOyster");
+
+// Phoenix
+std::string outPath("/work/y0078217/Results/MusselOysterResults/");
+std::string gridPathParent = "/work/y0078217/Grids/GridMusselOyster/";
+std::string stlPath("/home/y0078217/STL/MusselOyster/");
 std::string simulationName("MusselOyster");
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -109,20 +117,37 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator = true;
-    bool useMultiGPU      = true;
     bool useStreams       = true;
     bool useLevels        = true;
     para->useReducedCommunicationAfterFtoC = true;
+    para->setCalcTurbulenceIntensity(true);
+
+    if (para->getNumprocs() == 1) {
+       useStreams       = false;
+       para->useReducedCommunicationAfterFtoC = false;
+    }
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     std::string bivalveType = "MUSSEL"; // "MUSSEL" "OYSTER"
     std::string gridPath(gridPathParent + bivalveType); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
 
-    real dxGrid = (real)1.0;
+    // real dxGrid = (real)2.0; // 2.0
+    real dxGrid = (real)1.0; // 1.0
+    if (para->getNumprocs() == 8)  
+        dxGrid = 0.5;  
     real vxLB = (real)0.051; // LB units
     real Re = (real)300.0;
-    real viscosityLB = (vxLB * dxGrid) / Re;
+
+    real heightBivalve;
+    if (bivalveType == "MUSSEL")
+        heightBivalve = (real)35.0; 
+    else if (bivalveType == "OYSTER")
+        heightBivalve = (real)72.0;
+    else
+        std::cerr << "Error: unknown bivalveType" << std::endl;
+    real length = 1.0 / dxGrid; // heightBivalve / dxGrid
+    real viscosityLB = (vxLB * length) / Re;
 
     para->setVelocity(vxLB);
     para->setViscosity(viscosityLB);
@@ -130,26 +155,31 @@ void multipleLevel(const std::string& configPath)
     para->setViscosityRatio((real) 0.058823529);
     para->setDensityRatio((real) 998.0);
 
+    *logging::out << logging::Logger::INFO_HIGH << "bivalveType = " << bivalveType << " \n";
     *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
     *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
     *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n";
     *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
     *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
     *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "useMultiGPU = " << useMultiGPU << "\n";
     *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
 
     
-    para->setTOut(1000);
-    para->setTEnd(10000);
+    // para->setTOut(1000);
+    // para->setTEnd(10000);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    para->setOutputPath(path);
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
     para->setOutputPrefix(simulationName);
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
+    para->setFName(para->getOutputPath() + para->getOutputPrefix());
     para->setPrintFiles(true);
+    std::cout << "Write result files to " << para->getFName() << std::endl;
+
 
     if (useLevels)
         para->setMaxLevel(2);
@@ -159,15 +189,15 @@ void multipleLevel(const std::string& configPath)
 
     if (useStreams)
         para->setUseStreams();
-    //para->setMainKernel("CumulantK17CompChim");
+    // para->setMainKernel("CumulantK17CompChim");
     para->setMainKernel("CumulantK17CompChimStream");
     *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
 
-    if (useMultiGPU) {
-        para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
-        para->setMaxDev(2);
-    } else 
-        para->setDevices(std::vector<uint>{ (uint)0 });
+    // if (para->getNumprocs() > 1) {
+    //     para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
+    //     para->setMaxDev(2);
+    // } else 
+    //     para->setDevices(std::vector<uint>{ (uint)0 });
 
 
 
@@ -175,125 +205,410 @@ void multipleLevel(const std::string& configPath)
 
 
     if (useGridGenerator) {
+        real bbzm;
+        real bbzp;
+        if (bivalveType == "MUSSEL")
+            bbzp = 9.0;
+        if (bivalveType == "OYSTER")
+            bbzp = 13.0;
+        bbzm = -bbzp;
         // bounding box mussel:
-        const real bbxm = 0.0;
-        const real bbxp = 76.0;
-        const real bbym = 0.0;
-        const real bbyp = 35.0;
-        const real bbzm = 0.0;
-        const real bbzp = 18.3;
+        // const real bbxm = 0.0;
+        // const real bbxp = 76.0;
+        // const real bbym = 0.0;
+        // const real bbyp = 35.0;
+        // const real bbzm = -9.15;
+        // const real bbzp = 9.15;
         // bounding box oyster:
         // const real bbxm = 0.0;
-        // const real bbxp = 115.0;
+        // const real bbxp = 102.0;
         // const real bbym = 0.0;
-        // const real bbyp = 27.0;
-        // const real bbzm = 0.0;
-        // const real bbzp = 63.0;
-
-        const real xGridMin  = bbxm - 40.0;
-        const real xGridMax  = bbxp + 250.0;
-        const real yGridMin  = bbym + 1.0;
-        const real yGridMax  = bbyp + 60.0;
-        const real zGridMin  = bbzm - 30.0;
-        const real zGridMax  = bbzp + 30.0;
-
-        TriangularMesh *bivalveSTL =
-            TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + ".stl");
+        // const real bbyp = 72.0;
+        // const real bbzm = -13.0;
+        // const real bbzp = 13.0;
+
+        const real xGridMin  = -100.0;     // -100.0;
+        const real xGridMax  = 470.0;      // alt 540.0 // neu 440 // mit groesserem Level 1 470
+        const real yGridMin  = 1.0;        // 1.0;
+        const real yGridMax  = 350.0;      // alt 440.0; // neu 350
+        const real zGridMin  = -85;        // -85;
+        const real zGridMax  = 85.0;       // 85;
+
+        TriangularMesh *bivalveSTL       = TriangularMesh::make(stlPath + bivalveType + ".stl");
         TriangularMesh *bivalveRef_1_STL = nullptr;
         if (useLevels)
-            bivalveRef_1_STL = TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + "_Level1.stl");
-
-        if (useMultiGPU) {
-            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
-            
-            real overlap      = (real)8.0 * dxGrid;            
-            const real ySplit = bbyp - 10.0;
+            bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
 
-            if (generatePart == 0) {
-                gridBuilder->addCoarseGrid( xGridMin,   yGridMin,           zGridMin, 
-                                            xGridMax,   ySplit+overlap,     zGridMax,   dxGrid);
-            }
-            if (generatePart == 1) {
-                gridBuilder->addCoarseGrid(xGridMin,    ySplit-overlap,     zGridMin, 
-                                           xGridMax,    yGridMax,           zGridMax,   dxGrid);
-            }
-
-            if (useLevels) {
-            gridBuilder->setNumberOfLayers(6, 8);
-            gridBuilder->addGrid(bivalveRef_1_STL, 1);
-            }
-
-            gridBuilder->addGeometry(bivalveSTL);
-
-            if (generatePart == 0)
-                gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax,
-                                                                           yGridMin,    ySplit, 
-                                                                           zGridMin,    zGridMax));
-            if (generatePart == 1)
-                gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax, 
-                                                                           ySplit,      yGridMax, 
-                                                                           zGridMin,    zGridMax));
-            
-            gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
-            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
 
-            if (generatePart == 0) {
-                gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
-            }
+        if (para->getNumprocs() > 1) {
+            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
 
-            if (generatePart == 1) {
-                gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+            real overlap = (real)8.0 * dxGrid;
+            gridBuilder->setNumberOfLayers(10, 8);
+
+            if (comm->getNummberOfProcess() == 2) {
+                const real zSplit = 0.0; //round(((double)bbzp + bbzm) * 0.5);          
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid( xGridMin,   yGridMin,     zGridMin, 
+                                                xGridMax,   yGridMax,     zSplit+overlap,   dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid( xGridMin,    yGridMin,     zSplit-overlap, 
+                                                xGridMax,    yGridMax,     zGridMax,        dxGrid);
+                }
+
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+
+                if (generatePart == 0){
+                    gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax,
+                                                                               yGridMin,    yGridMax, 
+                                                                               zGridMin,    zSplit));
+                }
+                if (generatePart == 1){
+                    gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax, 
+                                                                               yGridMin,    yGridMax, 
+                                                                               zSplit,      zGridMax));            
+                }
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
+                }
+
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                ////////////////////////////////////////////////////////////////////////// 
+                if (generatePart == 0)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                if (generatePart == 1)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                //////////////////////////////////////////////////////////////////////////           
+            } else if (comm->getNummberOfProcess() == 4) {
+
+                const real xSplit = 100.0;
+                const real zSplit = 0.0;
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, yGridMax,
+                                               zSplit+overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit-overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit-overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, yGridMax, zSplit, zGridMax));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {                    
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);                
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);                    
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
+                }
+                //////////////////////////////////////////////////////////////////////////
+            } else if (comm->getNummberOfProcess() == 8) {
+                real xSplit = 140.0; // 100.0 // mit groesserem Level 1 140.0
+                real ySplit = 32.0;  // 32.0
+                real zSplit = 0.0;
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xSplit + overlap, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+                
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 4)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 5)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zSplit, zGridMax));
+                if (generatePart == 6)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 7)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 7) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////                
             }
-
-            //////////////////////////////////////////////////////////////////////////                       
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); 
-            if (generatePart == 0)
-                gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0); 
-            if (generatePart == 1)
-                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);  
-            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-            //////////////////////////////////////////////////////////////////////////
             if (para->getKernelNeedsFluidNodeIndicesToRun())
                 gridBuilder->findFluidNodes(useStreams);
 
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/");
-            //gridBuilder->writeArrows(path + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
+            // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/part" + std::to_string(generatePart) + "_"); 
+            // gridBuilder->writeGridsToVtk(outPath + bivalveType + "/" + std::to_string(generatePart) + "/grid/"); 
+            // gridBuilder->writeArrows(outPath + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
 
-            SimulationFileWriter::write(gridPath + "/" + std::to_string(generatePart) + "/", gridBuilder, FILEFORMAT::BINARY);
-           
+            SimulationFileWriter::write(gridPath + std::to_string(generatePart) + "/", gridBuilder,
+                                        FILEFORMAT::BINARY);
         } else {
 
             gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax, dxGrid);
 
             if (useLevels) {
-                gridBuilder->setNumberOfLayers(6, 8);
+                gridBuilder->setNumberOfLayers(10, 8);
                 gridBuilder->addGrid(bivalveRef_1_STL, 1);
             }
 
             gridBuilder->addGeometry(bivalveSTL);
 
-            gridBuilder->setPeriodicBoundaryCondition(false, false, true);
-
             gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
 
+            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
             //////////////////////////////////////////////////////////////////////////
+            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
-
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+
             //////////////////////////////////////////////////////////////////////////
             if (para->getKernelNeedsFluidNodeIndicesToRun())
                 gridBuilder->findFluidNodes(useStreams);
 
-            // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + bivalveType + "/grid/");
-            // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + bivalveType + "/arrow");
+            // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/");
+            // gridBuilder->writeArrows ((outPath + bivalveType + "/arrow");
 
             SimulationFileWriter::write(gridPath, gridBuilder, FILEFORMAT::BINARY);
         }
@@ -360,18 +675,23 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2, configFile;
+
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
         
-        try
-        {
+        try {
             //////////////////////////////////////////////////////////////////////////
 
-			std::string targetPath;
+            std::string targetPath;
 
-			targetPath = __FILE__;
+            targetPath = __FILE__;
+
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
 
 #ifdef _WIN32
             targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
@@ -379,12 +699,16 @@ int main( int argc, char* argv[])
             targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
 #endif
 
-			std::cout << targetPath << std::endl;
+            std::cout << targetPath << std::endl;
+
+            if (configFile.size() == 0) {
+                configFile = targetPath + "configMusselOyster.txt";
+            }
 
-			multipleLevel(targetPath + "configMusselOyster.txt");
+            multipleLevel(configFile);
 
             //////////////////////////////////////////////////////////////////////////
-		}
+        }
         catch (const std::bad_alloc& e)
         { 
             *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
diff --git a/apps/gpu/LBM/MusselOyster/configMusselOyster.txt b/apps/gpu/LBM/MusselOyster/configMusselOyster.txt
index 1cc65164170b72566efd7c09a86b4d712edc9a86..3c6ac8b5fdff8b5c3befbf2975938416066a61f1 100644
--- a/apps/gpu/LBM/MusselOyster/configMusselOyster.txt
+++ b/apps/gpu/LBM/MusselOyster/configMusselOyster.txt
@@ -1,20 +1,32 @@
+# Tesla 03
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/MusselOyster.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/MusselOyster/configMusselOyster.txt"
+# Phoenix
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/MusselOyster/configMusselOyster.txt"
+
+# Phoenix mpich
+# mpirun -np 4 nvprof -f -o MusselOyster.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# Phoenix openmpi
+# mpirun -np 4 nvprof -f -o MusselOyster.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+
 ##################################################
 #GPU Mapping
 ##################################################
-#Devices="0 1 2 3"
-#NumberOfDevices=4
+Devices="0 1 2 3"
+NumberOfDevices=2
 
 ##################################################
 #informations for Writing
 ##################################################
-Path=E:/temp/MusselOysterResults
+Path=/work/y0078217/Results/MusselOysterResults/
+#Path=E:/temp/MusselOysterResults/
 #Path="F:/Work/Computations/out/MusselOyster/"
 #Prefix="MusselOyster" 
 #WriteGrid=true
 ##################################################
 #informations for reading
 ##################################################
-GridPath=E:/temp/GridMussel/MUSSEL
+GridPath=E/work/y0078217/Grids/GridMusselOyster/
+#GridPath=E:/temp/GridMussel/MUSSEL/
 #GridPath="C:"
 
 ##################################################
@@ -31,6 +43,6 @@ GridPath=E:/temp/GridMussel/MUSSEL
 ##################################################
 #simulation parameter
 ##################################################
-#TimeEnd=100000
-#TimeOut=1000 
+TimeEnd=10000
+TimeOut=1000 
 #TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e3863eae9a0f2d52413e7c68b8aa4032b193ae3
--- /dev/null
+++ b/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0"
+NumberOfDevices=1
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/MusselOysterResults/1GPUMussel1/
+#Path="F:/Work/Computations/out/MusselOyster/"
+#Prefix="MusselOyster" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridMusselOyster/Mussel1GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=400000 #400000 / 200000
+TimeOut=200000 #200000 / 100000
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7ef59a69ac43327fff2648dcaae36ec91f70282
--- /dev/null
+++ b/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/MusselOysterResults/8GPUMussel05/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridMusselOyster/Mussel8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=1000 # 800000
+TimeOut=1000 # 400000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster2x/CMakeLists.txt b/apps/gpu/LBM/MusselOyster2x/CMakeLists.txt
index 47f36604929442374a26f0cb7d51014e75c60a32..881b64c4434ae32c1fe4b12cff6de48009e38726 100644
--- a/apps/gpu/LBM/MusselOyster2x/CMakeLists.txt
+++ b/apps/gpu/LBM/MusselOyster2x/CMakeLists.txt
@@ -5,6 +5,6 @@ vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenera
 set_source_files_properties(MusselOyster2x.cpp PROPERTIES LANGUAGE CUDA)
 
 set_target_properties(MusselOyster2x PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON
-	VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
-    VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:MusselOyster2x>\"")
\ No newline at end of file
+	CUDA_SEPARABLE_COMPILATION ON)
+#	 VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
+#    VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:MusselOyster2x>\"")
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster2x/MusselOyster2x.cpp b/apps/gpu/LBM/MusselOyster2x/MusselOyster2x.cpp
index f4c8de81f78b301bb010b76c7e6e473a292ba550..c6e309f5ecdbeba85e96e7b7abe2f93e11faeb6f 100644
--- a/apps/gpu/LBM/MusselOyster2x/MusselOyster2x.cpp
+++ b/apps/gpu/LBM/MusselOyster2x/MusselOyster2x.cpp
@@ -67,8 +67,9 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-std::string path("E:/temp/MusselOysterResults");
+std::string outPath("E:/temp/MusselOysterResults");
 std::string gridPathParent = "E:/temp/GridMussel/";
+std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/");
 std::string simulationName("MusselOyster");
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -109,10 +110,11 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator = true;
-    bool useMultiGPU      = true;
-    bool useStreams       = true;
+    bool useMultiGPU      = false;
+    bool useStreams       = false;
     bool useLevels        = true;
     para->useReducedCommunicationAfterFtoC = true;
+    para->setCalcTurbulenceIntensity(true);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -140,13 +142,15 @@ void multipleLevel(const std::string& configPath)
     *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
 
     
-    para->setTOut(1000);
-    para->setTEnd(10000);
+    //para->setTOut(1000);
+    //para->setTEnd(10000);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    para->setOutputPath(path);
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
     para->setOutputPrefix(simulationName);
     para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
     para->setPrintFiles(true);
@@ -163,11 +167,11 @@ void multipleLevel(const std::string& configPath)
     para->setMainKernel("CumulantK17CompChimStream");
     *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
 
-    if (useMultiGPU) {
-        para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
-        para->setMaxDev(2);
-    } else 
-        para->setDevices(std::vector<uint>{ (uint)0 });
+    //if (useMultiGPU) {
+    //    para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
+    //    para->setMaxDev(2);
+    //} else 
+    //    para->setDevices(std::vector<uint>{ (uint)0 });
 
 
 
@@ -197,11 +201,11 @@ void multipleLevel(const std::string& configPath)
         const real zGridMin  = bbzm - 30.0;
         const real zGridMax  = bbzp + 30.0;
 
-        TriangularMesh *bivalveSTL =
-            TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + ".stl");
+        TriangularMesh *bivalveSTL       = TriangularMesh::make(stlPath + bivalveType + ".stl");
         TriangularMesh *bivalveRef_1_STL = nullptr;
         if (useLevels)
-            bivalveRef_1_STL = TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + "_Level1.stl");
+            bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
+
 
         if (useMultiGPU) {
             const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
@@ -260,9 +264,9 @@ void multipleLevel(const std::string& configPath)
             if (para->getKernelNeedsFluidNodeIndicesToRun())
                 gridBuilder->findFluidNodes(useStreams);
 
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/");
-            //gridBuilder->writeArrows(path + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
+            //gridBuilder->writeGridsToVtk(outPath + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
+            //gridBuilder->writeGridsToVtk(outPath + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/");
+            //gridBuilder->writeArrows(outPath + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
 
             SimulationFileWriter::write(gridPath + "/" + std::to_string(generatePart) + "/", gridBuilder, FILEFORMAT::BINARY);
            
@@ -360,18 +364,22 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2, configFile; 
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
         
-        try
-        {
+        try {
             //////////////////////////////////////////////////////////////////////////
 
-			std::string targetPath;
+            std::string targetPath;
+
+            targetPath = __FILE__;
 
-			targetPath = __FILE__;
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
 
 #ifdef _WIN32
             targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
@@ -379,12 +387,16 @@ int main( int argc, char* argv[])
             targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
 #endif
 
-			std::cout << targetPath << std::endl;
+            std::cout << targetPath << std::endl;
+
+            if (configFile.size() == 0) {
+                configFile = targetPath + "configMusselOyster.txt";
+            }
 
-			multipleLevel(targetPath + "configMusselOyster.txt");
+            multipleLevel(configFile);
 
             //////////////////////////////////////////////////////////////////////////
-		}
+        }
         catch (const std::bad_alloc& e)
         { 
             *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
diff --git a/apps/gpu/LBM/MusselOyster3z/MusselOyster3z.cpp b/apps/gpu/LBM/MusselOyster3z/MusselOyster3z.cpp
index 0160b01c137f2ba3cd0269c5552c62154c1f9de7..224896fb6d82336df9a1522efe2679b3760aeaec 100644
--- a/apps/gpu/LBM/MusselOyster3z/MusselOyster3z.cpp
+++ b/apps/gpu/LBM/MusselOyster3z/MusselOyster3z.cpp
@@ -67,8 +67,9 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-std::string path("E:/temp/MusselOysterResults");
+std::string outPath("E:/temp/MusselOysterResults");
 std::string gridPathParent = "E:/temp/GridMussel/";
+std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/");
 std::string simulationName("MusselOyster");
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -113,6 +114,8 @@ void multipleLevel(const std::string& configPath)
     bool useStreams       = true;
     bool useLevels        = true;
     para->useReducedCommunicationAfterFtoC = true;
+    para->setCalcTurbulenceIntensity(true);
+
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -139,13 +142,15 @@ void multipleLevel(const std::string& configPath)
     *logging::out << logging::Logger::INFO_HIGH << "useMultiGPU = " << useMultiGPU << "\n";
     *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
 
-    para->setTOut(1000);
-    para->setTEnd(10000);
+    //para->setTOut(1000);
+    //para->setTEnd(10000);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    para->setOutputPath(path);
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
     para->setOutputPrefix(simulationName);
     para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
     para->setPrintFiles(true);
@@ -162,11 +167,11 @@ void multipleLevel(const std::string& configPath)
     para->setMainKernel("CumulantK17CompChimStream");
     *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
 
-    if (useMultiGPU) {
-        para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
-        para->setMaxDev(2);
-    } else 
-        para->setDevices(std::vector<uint>{ (uint)0 });
+    //if (useMultiGPU) {
+    //    para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
+    //    para->setMaxDev(2);
+    //} else 
+    //    para->setDevices(std::vector<uint>{ (uint)0 });
 
 
 
@@ -196,11 +201,11 @@ void multipleLevel(const std::string& configPath)
         const real zGridMin  = bbzm - 30.0;
         const real zGridMax  = bbzp + 30.0;
 
-        TriangularMesh *bivalveSTL =
-            TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + ".stl");
+        TriangularMesh *bivalveSTL       = TriangularMesh::make(stlPath + bivalveType + ".stl");
         TriangularMesh *bivalveRef_1_STL = nullptr;
         if (useLevels)
-            bivalveRef_1_STL = TriangularMesh::make("C:/Users/Master/Documents/MasterAnna/STL/" + bivalveType + "_Level1.stl");
+            bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
+
 
         if (useMultiGPU) {
             const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
@@ -261,9 +266,9 @@ void multipleLevel(const std::string& configPath)
             if (para->getKernelNeedsFluidNodeIndicesToRun())
                 gridBuilder->findFluidNodes(useStreams);
 
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
-            //gridBuilder->writeGridsToVtk(path + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/");
-            //gridBuilder->writeArrows(path + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
+            //gridBuilder->writeGridsToVtk(outPath + "/" + bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
+            //gridBuilder->writeGridsToVtk(outPath + "/" + bivalveType + "/" + std::to_string(generatePart) + "/grid/");
+            //gridBuilder->writeArrows(outPath + "/" + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
 
             SimulationFileWriter::write(gridPath + "/" + std::to_string(generatePart) + "/", gridBuilder, FILEFORMAT::BINARY);
            
@@ -361,7 +366,7 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2, configFile; 
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
@@ -374,6 +379,11 @@ int main( int argc, char* argv[])
 
 			targetPath = __FILE__;
 
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            } 
+
 #ifdef _WIN32
             targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
 #else
@@ -382,7 +392,11 @@ int main( int argc, char* argv[])
 
 			std::cout << targetPath << std::endl;
 
-			multipleLevel(targetPath + "configMusselOyster.txt");
+            if (configFile.size() == 0) {
+                configFile = targetPath + "configMusselOyster.txt";
+            }  
+
+			multipleLevel(configFile);
 
             //////////////////////////////////////////////////////////////////////////
 		}
diff --git a/apps/gpu/LBM/MusselOysterOyster/CMakeLists.txt b/apps/gpu/LBM/MusselOysterOyster/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6966f6e48d5e08200ff3d57ab9096ba8db73a74d
--- /dev/null
+++ b/apps/gpu/LBM/MusselOysterOyster/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(MusselOysterOyster LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES MusselOysterOyster.cpp)
+
+set_source_files_properties(MusselOysterOyster.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(MusselOysterOyster PROPERTIES 
+	CUDA_SEPARABLE_COMPILATION ON
+	VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
+    VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:MusselOysterOyster>\"")
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOysterOyster/MusselOysterOyster.cpp b/apps/gpu/LBM/MusselOysterOyster/MusselOysterOyster.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..477bae8075b4bd33d85dac662826b0a685aa3255
--- /dev/null
+++ b/apps/gpu/LBM/MusselOysterOyster/MusselOysterOyster.cpp
@@ -0,0 +1,728 @@
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <exception>
+#include <memory>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "basics/Core/DataTypes.h"
+#include "basics/PointerDefinitions.h"
+#include "basics/Core/VectorTypes.h"
+
+#include "basics/Core/LbmOrGks.h"
+#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/config/ConfigurationFile.h"
+#include "basics/Core/Logger/Logger.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "geometries/Sphere/Sphere.h"
+#include "geometries/TriangularMesh/TriangularMesh.h"
+
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
+#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "utilities/communication.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//  Tesla 03
+// std::string outPath("E:/temp/MusselOysterResults/");
+// std::string gridPathParent = "E:/temp/GridMussel/";
+// std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/");
+// std::string simulationName("MusselOyster");
+
+// Phoenix
+std::string outPath("/work/y0078217/Results/MusselOysterResults/");
+std::string gridPathParent = "/work/y0078217/Grids/GridMusselOyster/";
+std::string stlPath("/home/y0078217/STL/MusselOyster/");
+std::string simulationName("MusselOyster");
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    auto gridFactory = GridFactory::make();
+    gridFactory->setGridStrategy(Device::CPU);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    
+	vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::basics::ConfigurationFile config;
+    std::cout << configPath << std::endl;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    bool useGridGenerator = true;
+    bool useStreams       = true;
+    bool useLevels        = true;
+    para->useReducedCommunicationAfterFtoC = true;
+    para->setCalcTurbulenceIntensity(true);
+
+    if (para->getNumprocs() == 1) {
+       useStreams       = false;
+       para->useReducedCommunicationAfterFtoC = false;
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    std::string bivalveType = "OYSTER"; // "MUSSEL" "OYSTER"
+    std::string gridPath(gridPathParent + bivalveType); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
+
+    // real dxGrid = (real)2.0; // 2.0
+    real dxGrid = (real)1.0; // 1.0
+    if (para->getNumprocs() == 8)  
+        dxGrid = 0.5;  
+    real vxLB = (real)0.051; // LB units
+    real Re = (real)300.0;
+
+    real heightBivalve;
+    if (bivalveType == "MUSSEL")
+        heightBivalve = (real)35.0; 
+    else if (bivalveType == "OYSTER")
+        heightBivalve = (real)72.0;
+    else
+        std::cerr << "Error: unknown bivalveType" << std::endl;
+    real length = 1.0 / dxGrid; // heightBivalve / dxGrid
+    real viscosityLB = (vxLB * length) / Re;
+
+    para->setVelocity(vxLB);
+    para->setViscosity(viscosityLB);
+    para->setVelocityRatio((real) 58.82352941);
+    para->setViscosityRatio((real) 0.058823529);
+    para->setDensityRatio((real) 998.0);
+
+    *logging::out << logging::Logger::INFO_HIGH << "bivalveType = " << bivalveType << " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
+
+    
+    // para->setTOut(1000);
+    // para->setTEnd(10000);
+
+    para->setCalcDragLift(false);
+    para->setUseWale(false);
+
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
+    para->setOutputPrefix(simulationName);
+    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    para->setPrintFiles(true);
+    std::cout << "Write result files to " << para->getFName() << std::endl;
+
+
+    if (useLevels)
+        para->setMaxLevel(2);
+    else
+        para->setMaxLevel(1);
+
+
+    if (useStreams)
+        para->setUseStreams();
+    // para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("CumulantK17CompChimStream");
+    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    // if (para->getNumprocs() > 1) {
+    //     para->setDevices(std::vector<uint>{ (uint)0, (uint)1 });
+    //     para->setMaxDev(2);
+    // } else 
+    //     para->setDevices(std::vector<uint>{ (uint)0 });
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+
+
+    if (useGridGenerator) {
+        real bbzm;
+        real bbzp;
+        if (bivalveType == "MUSSEL")
+            bbzp = 9.0;
+        if (bivalveType == "OYSTER")
+            bbzp = 13.0;
+        bbzm = -bbzp;
+        // bounding box mussel:
+        // const real bbxm = 0.0;
+        // const real bbxp = 76.0;
+        // const real bbym = 0.0;
+        // const real bbyp = 35.0;
+        // const real bbzm = -9.15;
+        // const real bbzp = 9.15;
+        // bounding box oyster:
+        // const real bbxm = 0.0;
+        // const real bbxp = 102.0;
+        // const real bbym = 0.0;
+        // const real bbyp = 72.0;
+        // const real bbzm = -13.0;
+        // const real bbzp = 13.0;
+
+        const real xGridMin  = -100.0;     // -100.0;
+        const real xGridMax  = 470.0;      // alt 540.0 // neu 440 // mit groesserem Level 1 470
+        const real yGridMin  = 1.0;        // 1.0;
+        const real yGridMax  = 350.0;      // alt 440.0; // neu 350
+        const real zGridMin  = -85;        // -85;
+        const real zGridMax  = 85.0;       // 85;
+
+        TriangularMesh *bivalveSTL       = TriangularMesh::make(stlPath + bivalveType + ".stl");
+        TriangularMesh *bivalveRef_1_STL = nullptr;
+        if (useLevels)
+            bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
+
+
+        if (para->getNumprocs() > 1) {
+            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
+
+            real overlap = (real)8.0 * dxGrid;
+            gridBuilder->setNumberOfLayers(10, 8);
+
+            if (comm->getNummberOfProcess() == 2) {
+                const real zSplit = 0.0; //round(((double)bbzp + bbzm) * 0.5);          
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid( xGridMin,   yGridMin,     zGridMin, 
+                                                xGridMax,   yGridMax,     zSplit+overlap,   dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid( xGridMin,    yGridMin,     zSplit-overlap, 
+                                                xGridMax,    yGridMax,     zGridMax,        dxGrid);
+                }
+
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+
+                if (generatePart == 0){
+                    gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax,
+                                                                               yGridMin,    yGridMax, 
+                                                                               zGridMin,    zSplit));
+                }
+                if (generatePart == 1){
+                    gridBuilder->setSubDomainBox(std::make_shared<BoundingBox>(xGridMin,    xGridMax, 
+                                                                               yGridMin,    yGridMax, 
+                                                                               zSplit,      zGridMax));            
+                }
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
+                }
+
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                ////////////////////////////////////////////////////////////////////////// 
+                if (generatePart == 0)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                if (generatePart == 1)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                //////////////////////////////////////////////////////////////////////////           
+            } else if (comm->getNummberOfProcess() == 4) {
+
+                const real xSplit = 100.0;
+                const real zSplit = 0.0;
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, yGridMax,
+                                               zSplit+overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit-overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit-overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, yGridMax, zSplit, zGridMax));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {                    
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);                
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);                    
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
+                }
+                //////////////////////////////////////////////////////////////////////////
+            } else if (comm->getNummberOfProcess() == 8) {
+                real xSplit = 140.0; // 100.0 // mit groesserem Level 1 140.0
+                real ySplit = 32.0;  // 32.0
+                real zSplit = 0.0;
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xSplit + overlap, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    gridBuilder->addGrid(bivalveRef_1_STL, 1);
+                }
+
+                gridBuilder->addGeometry(bivalveSTL);
+                
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 4)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 5)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zSplit, zGridMax));
+                if (generatePart == 6)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 7)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  0.0, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 7) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////                
+            }
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/part" + std::to_string(generatePart) + "_"); 
+            // gridBuilder->writeGridsToVtk(outPath + bivalveType + "/" + std::to_string(generatePart) + "/grid/"); 
+            // gridBuilder->writeArrows(outPath + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
+
+            SimulationFileWriter::write(gridPath + std::to_string(generatePart) + "/", gridBuilder,
+                                        FILEFORMAT::BINARY);
+        } else {
+
+            gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax, dxGrid);
+
+            if (useLevels) {
+                gridBuilder->setNumberOfLayers(10, 8);
+                gridBuilder->addGrid(bivalveRef_1_STL, 1);
+            }
+
+            gridBuilder->addGeometry(bivalveSTL);
+
+            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+
+            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+            //////////////////////////////////////////////////////////////////////////
+            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+
+            //////////////////////////////////////////////////////////////////////////
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/");
+            // gridBuilder->writeArrows ((outPath + bivalveType + "/arrow");
+
+            SimulationFileWriter::write(gridPath, gridBuilder, FILEFORMAT::BINARY);
+        }
+        
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // const real velocityLB = velocity * dt / dx; // LB units
+
+        // const real vx = velocityLB / (real)sqrt(2.0); // LB units
+        // const real vy = velocityLB / (real)sqrt(2.0); // LB units
+
+        // const real viscosityLB = nx * velocityLB / Re; // LB units
+
+        //*logging::out << logging::Logger::INFO_HIGH << "velocity  [dx/dt] = " << velocityLB << " \n";
+        //*logging::out << logging::Logger::INFO_HIGH << "viscosity [dx^2/dt] = " << viscosityLB << "\n";
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+        // para->setVelocity(velocityLB);
+        // para->setViscosity(viscosityLB);
+
+        // para->setVelocityRatio(velocity/ velocityLB);
+
+        // para->setMainKernel("CumulantK17CompChim");
+
+        // para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz)
+        // {
+        //          rho = (real)0.0;
+        //          vx  = (real)0.0; //(6 * velocityLB * coordZ * (L - coordZ) / (L * L));
+        //          vy  = (real)0.0;
+        //          vz  = (real)0.0;
+        //      });
+
+
+
+       //return;
+    }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+
+    SPtr<GridProvider> gridGenerator;
+    if (useGridGenerator)
+        gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+    else {
+        gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+    }
+           
+    Simulation sim;
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+    SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+    sim.setFactories(kernelFactory, preProcessorFactory);
+    sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);
+    sim.run();
+    sim.free();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+}
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2, configFile;
+
+    if ( argv != NULL )
+    {
+        //str = static_cast<std::string>(argv[0]);
+        
+        try {
+            //////////////////////////////////////////////////////////////////////////
+
+            std::string targetPath;
+
+            targetPath = __FILE__;
+
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
+
+#ifdef _WIN32
+            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+            std::cout << targetPath << std::endl;
+
+            if (configFile.size() == 0) {
+                configFile = targetPath + "configMusselOyster.txt";
+            }
+
+            multipleLevel(configFile);
+
+            //////////////////////////////////////////////////////////////////////////
+        }
+        catch (const std::bad_alloc& e)
+        { 
+            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+        }
+        catch (const std::exception& e)
+        {   
+            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+        }
+        catch (...)
+        {
+            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+        }
+    }
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/MusselOysterOyster/configPhoenix1GPU.txt b/apps/gpu/LBM/MusselOysterOyster/configPhoenix1GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09d92d28cf210ddef5a0d87bc285d9f2d21e27ff
--- /dev/null
+++ b/apps/gpu/LBM/MusselOysterOyster/configPhoenix1GPU.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0"
+NumberOfDevices=1
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/MusselOysterResults/1GPUOyster1/
+#Path="F:/Work/Computations/out/MusselOyster/"
+#Prefix="MusselOyster" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster1GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=400000 #400000 / 200000
+TimeOut=200000 #200000 / 100000
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOysterOyster/configPhoenix8.txt b/apps/gpu/LBM/MusselOysterOyster/configPhoenix8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dfef6c2ae7d5bc1ed108578240611745b33469c
--- /dev/null
+++ b/apps/gpu/LBM/MusselOysterOyster/configPhoenix8.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/MusselOysterResults/8GPUOyster05/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridMusselOyster/Oyster8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=800000
+TimeOut=400000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/CMakeLists.txt b/apps/gpu/LBM/SphereScaling/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49bee20f7cfc3561c62cf1b36c2f2992e7baada8
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(SphereScaling LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES SphereScaling.cpp)
+
+set_source_files_properties(SphereScaling.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(SphereScaling PROPERTIES 
+	CUDA_SEPARABLE_COMPILATION ON)
+	# VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
+    # VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:SphereScaling>\"")
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fbde84da226c4076990cde7b1dfeb76f7b66a645
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
@@ -0,0 +1,742 @@
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <exception>
+#include <memory>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "basics/Core/DataTypes.h"
+#include "basics/PointerDefinitions.h"
+#include "basics/Core/VectorTypes.h"
+
+#include "basics/Core/LbmOrGks.h"
+#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/config/ConfigurationFile.h"
+#include "basics/Core/Logger/Logger.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "geometries/Sphere/Sphere.h"
+#include "geometries/Cuboid/Cuboid.h"
+#include "geometries/Conglomerate/Conglomerate.h"
+#include "geometries/TriangularMesh/TriangularMesh.h"
+
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
+#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "utilities/communication.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//  Tesla 03
+//  std::string outPath("E:/temp/SphereScalingResults/");
+//  std::string gridPathParent = "E:/temp/GridSphereScaling/";
+//  std::string simulationName("SphereScaling");
+// std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/Sphere/");
+
+// Phoenix
+std::string outPath("/work/y0078217/Results/SphereScalingResults/");
+std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/";
+std::string simulationName("SphereScaling");
+std::string stlPath("/home/y0078217/STL/Sphere/");
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    auto gridFactory = GridFactory::make();
+    gridFactory->setGridStrategy(Device::CPU);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    
+	vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::basics::ConfigurationFile config;
+    std::cout << configPath << std::endl;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    bool useGridGenerator                  = true;
+    bool useLevels                         = false;
+    std::string scalingType                = "strong"; // "strong" // "weak"
+    // bool useStreams                        = true;
+    // para->useReducedCommunicationAfterFtoC = true;
+    bool useStreams = para->getUseStreams();
+
+    if (para->getNumprocs() == 1) {
+       useStreams       = false;
+       para->useReducedCommunicationAfterFtoC = false;
+    }
+    if (scalingType != "weak" && scalingType != "strong")
+        std::cerr << "unknown scaling type" << std::endl;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
+
+    real dxGrid      = (real)0.2;
+    real vxLB = (real)0.0005; // LB units
+    real viscosityLB = 0.001; //(vxLB * dxGrid) / Re;
+
+    para->setVelocity(vxLB);
+    para->setViscosity(viscosityLB);
+    para->setVelocityRatio((real) 58.82352941);
+    para->setViscosityRatio((real) 0.058823529);
+    para->setDensityRatio((real) 998.0);
+
+    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "para->useReducedCommunicationAfterFtoC = " <<  para->useReducedCommunicationAfterFtoC << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "scalingType = " <<  scalingType << "\n";
+    
+    // para->setTOut(10);
+    // para->setTEnd(10);
+
+    para->setCalcDragLift(false);
+    para->setUseWale(false);
+
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
+    para->setOutputPrefix(simulationName);
+    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    para->setPrintFiles(true);
+    std::cout << "Write result files to " << para->getFName() << std::endl;
+
+    if (useLevels)
+        para->setMaxLevel(2);
+    else
+        para->setMaxLevel(1);
+
+
+    if (useStreams)
+        para->setUseStreams();
+    //para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("CumulantK17CompChimStream");
+    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    // if (para->getNumprocs() == 4) {
+    //     para->setDevices(std::vector<uint>{ 0u, 1u, 2u, 3u });
+    //     para->setMaxDev(4);
+    // } else if (para->getNumprocs() == 2) {
+    //     para->setDevices(std::vector<uint>{ 2u, 3u });
+    //     para->setMaxDev(2);
+    // } else 
+    //     para->setDevices(std::vector<uint>{ 0u });
+    //     para->setMaxDev(1);
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+
+
+    if (useGridGenerator) {
+        real sideLengthCube;
+        if (useLevels){
+            if (scalingType == "strong")
+                sideLengthCube = 76.0; // Phoenix: strong scaling with two levels = 76.0
+            else if (scalingType == "weak")
+                sideLengthCube = 70.0; // Phoenix: weak scaling with two levels = 70.0
+        }
+        else
+            sideLengthCube = 92.0; // Phoenix: 86.0
+        real xGridMin          = 0.0; 
+        real xGridMax          = sideLengthCube;
+        real yGridMin          = 0.0;
+        real yGridMax          = sideLengthCube;
+        real zGridMin          = 0.0;
+        real zGridMax          = sideLengthCube;
+        const real dSphere     = 10.0;
+        const real dSphereLev1 = 22.0; // Phoenix: 22.0
+        const real dCubeLev1   = 72.0; // Phoenix: 72.0
+
+        if (para->getNumprocs() > 1) {
+            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
+
+            real overlap = (real)8.0 * dxGrid;
+            gridBuilder->setNumberOfLayers(10, 8);
+
+            if (comm->getNummberOfProcess() == 2) {
+                real zSplit = 0.5 * sideLengthCube;
+                    
+                if (scalingType == "weak"){
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + sideLengthCube;
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zSplit + overlap,
+                                               dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xGridMax, yGridMax, zGridMax,
+                                               dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( -0.5*dCubeLev1, -0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                           0.5*dCubeLev1,  0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(0.0, 0.0, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_2GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
+                }
+
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                //////////////////////////////////////////////////////////////////////////
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                if (generatePart == 0)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                if (generatePart == 1)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+           
+            } else if (comm->getNummberOfProcess() == 4) {
+                real ySplit= 0.5 * sideLengthCube;
+                real zSplit= 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax , ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( -0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                           0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(0.0, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_4GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);                    
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+            } else if (comm->getNummberOfProcess() == 8) {
+                real xSplit = 0.5 * sideLengthCube;
+                real ySplit = 0.5 * sideLengthCube;
+                real zSplit = 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {                    
+                    xSplit = xGridMax;
+                    xGridMax = xGridMax + (xGridMax-xGridMin);
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);                    
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xSplit + overlap, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                          sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_8GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+                
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 4)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 5)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zSplit, zGridMax));
+                if (generatePart == 6)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 7)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 7) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////                
+            }
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk(outPath + "grid/part" +
+            // std::to_string(generatePart) + "_"); gridBuilder->writeGridsToVtk(outPath +
+            // std::to_string(generatePart) + "/grid/"); gridBuilder->writeArrows(outPath + 
+            // std::to_string(generatePart) + " /arrow");
+
+            SimulationFileWriter::write(gridPath + std::to_string(generatePart) + "/", gridBuilder,
+                                        FILEFORMAT::BINARY);
+        } else {
+
+            gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax, dxGrid);
+
+            if (useLevels) {
+                    gridBuilder->setNumberOfLayers(10, 8);
+                if(scalingType == "strong"){
+                    gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                } else if (scalingType == "weak")
+                    gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                     sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+            }
+                
+            if (scalingType == "weak"){
+                if(useLevels){
+                    gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                }else{
+                   TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_1GPU.stl");
+                   gridBuilder->addGeometry(sphereSTL);
+                }
+            } else {
+                gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+            }
+
+            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            
+            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+            //////////////////////////////////////////////////////////////////////////
+            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+
+            // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+            //////////////////////////////////////////////////////////////////////////
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + "/grid/");
+            // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + "/arrow");
+
+            SimulationFileWriter::write(gridPath, gridBuilder, FILEFORMAT::BINARY);
+        }        
+    }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+
+    SPtr<GridProvider> gridGenerator;
+    if (useGridGenerator)
+        gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+    else {
+        gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+    }
+           
+    Simulation sim;
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+    SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+    sim.setFactories(kernelFactory, preProcessorFactory);
+    sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);
+    sim.run();
+    sim.free();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+}
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2, configFile;
+
+    if ( argv != NULL )
+    {
+        
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+
+			std::string targetPath;
+
+			targetPath = __FILE__;
+
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
+
+#ifdef _WIN32
+            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+			std::cout << targetPath << std::endl;
+
+            if (configFile.size()==0) {
+                configFile = targetPath + "config.txt";
+            }        
+
+			multipleLevel(configFile);
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const std::bad_alloc& e)
+        { 
+            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+        }
+        catch (const std::exception& e)
+        {   
+            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+        }
+        catch (...)
+        {
+            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+        }
+    }
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/SphereScaling/config.txt b/apps/gpu/LBM/SphereScaling/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44c5fedb297cc62f8b1b5d26c075bd5172cac081
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/config.txt
@@ -0,0 +1,46 @@
+# Tesla 03
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/SphereScaling.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+# Phoenix
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+
+# Phoenix mpich
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# Phoenix openmpi
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+#Path="E:/temp/SphereScalingResults/"
+Path=/work/y0078217/Results/SphereScalingResults/
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/
+#GridPath=E:/temp/GridSphereScaling/
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=10
+#TimeOut=10 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix1GPU.txt b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b35b152369ec96c738b803d6e87f1c6d7cf8d546
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="3"
+NumberOfDevices=1
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/1GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling1GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5488797815bd797916434e8b6a0a82ce623a8db4
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/1GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling1GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e93f161aa16977ecd65aab230f40db0bbef60130
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/1GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling1GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix2GPU.txt b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e88253b040bcab5ec1349e701a800841fba6f3a
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU.txt
@@ -0,0 +1,41 @@
+# Phoenix
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix2GPU.txt"
+# Profiling Phoenix
+# nsys profile --trace=cuda,mpi --mpi-impl=mpich --output=./nsysprofiles/nsys.sphereScaling.np2 --force-overwrite=true mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix2GPU.txt"
+
+##################################################
+#GPU Mapping
+##################################################
+Devices="3 2 1 0"
+NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/2GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling2GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=1000
+TimeOut=1000
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..795e6bcb7d0dc3314f26b171c2f61d88e005a797
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef75fb88e563869b67f8aa33d839ec85c1d749b6
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1043f7f8375eedca5ef624dd1a8baf60346ee26
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt
@@ -0,0 +1,36 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99a057d31c7f15659d32776967853e076b5939ee
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea6338672305177b5119a2f557675bc491fddadc
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad05efa37ec5fadc9bc5fe9711485ec6f03e1960
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/1LevWeakStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a62a49094745209d585830ee60531b195acbfd2b
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3 4 5 6 7"
+NumberOfDevices=8
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..892f11013d6742af416ba3b93a993b059a6fa3a0
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevStrongOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b026d6b7304f9f13effec6c899512beb804787f5
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=1000
+TimeOut=1000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae848a2889d1301de78c6fff42e045965fa9baf7
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevWeakOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc8403eca0bcf96645c85b81c3109ec7619f34d2
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevWeakStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100
+TimeOut=100
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/CMakeLists.txt b/apps/gpu/LBM/SphereScaling1/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a759675bd752a78d24f5dc795c3705b3679617c8
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(SphereScaling1 LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES SphereScaling1.cpp)
+
+set_source_files_properties(SphereScaling1.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(SphereScaling1 PROPERTIES 
+	CUDA_SEPARABLE_COMPILATION ON)
+	# VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
+    # VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:SphereScaling1>\"")
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp b/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..14d151402f93dcbda32a05f2f09b357b589332b8
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp
@@ -0,0 +1,742 @@
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <exception>
+#include <memory>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "basics/Core/DataTypes.h"
+#include "basics/PointerDefinitions.h"
+#include "basics/Core/VectorTypes.h"
+
+#include "basics/Core/LbmOrGks.h"
+#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/config/ConfigurationFile.h"
+#include "basics/Core/Logger/Logger.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "geometries/Sphere/Sphere.h"
+#include "geometries/Cuboid/Cuboid.h"
+#include "geometries/Conglomerate/Conglomerate.h"
+#include "geometries/TriangularMesh/TriangularMesh.h"
+
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
+#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "utilities/communication.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//  Tesla 03
+//  std::string outPath("E:/temp/SphereScalingResults/");
+//  std::string gridPathParent = "E:/temp/GridSphereScaling/";
+//  std::string simulationName("SphereScaling");
+// std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/Sphere/");
+
+// Phoenix
+std::string outPath("/work/y0078217/Results/SphereScalingResults/");
+std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/";
+std::string simulationName("SphereScaling");
+std::string stlPath("/home/y0078217/STL/Sphere/");
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    auto gridFactory = GridFactory::make();
+    gridFactory->setGridStrategy(Device::CPU);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    
+	vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::basics::ConfigurationFile config;
+    std::cout << configPath << std::endl;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    bool useGridGenerator                  = true;
+    bool useLevels                         = true;
+    std::string scalingType                = "weak"; // "strong" // "weak"
+    // bool useStreams                        = true;
+    // para->useReducedCommunicationAfterFtoC = true;
+    bool useStreams = para->getUseStreams();
+
+    if (para->getNumprocs() == 1) {
+       useStreams       = false;
+       para->useReducedCommunicationAfterFtoC = false;
+    }
+    if (scalingType != "weak" && scalingType != "strong")
+        std::cerr << "unknown scaling type" << std::endl;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
+
+    real dxGrid      = (real)0.2;
+    real vxLB = (real)0.0005; // LB units
+    real viscosityLB = 0.001; //(vxLB * dxGrid) / Re;
+
+    para->setVelocity(vxLB);
+    para->setViscosity(viscosityLB);
+    para->setVelocityRatio((real) 58.82352941);
+    para->setViscosityRatio((real) 0.058823529);
+    para->setDensityRatio((real) 998.0);
+
+    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "para->useReducedCommunicationAfterFtoC = " <<  para->useReducedCommunicationAfterFtoC << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "scalingType = " <<  scalingType << "\n";
+    
+    // para->setTOut(10);
+    // para->setTEnd(10);
+
+    para->setCalcDragLift(false);
+    para->setUseWale(false);
+
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
+    para->setOutputPrefix(simulationName);
+    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    para->setPrintFiles(true);
+    std::cout << "Write result files to " << para->getFName() << std::endl;
+
+    if (useLevels)
+        para->setMaxLevel(2);
+    else
+        para->setMaxLevel(1);
+
+
+    if (useStreams)
+        para->setUseStreams();
+    //para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("CumulantK17CompChimStream");
+    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    // if (para->getNumprocs() == 4) {
+    //     para->setDevices(std::vector<uint>{ 0u, 1u, 2u, 3u });
+    //     para->setMaxDev(4);
+    // } else if (para->getNumprocs() == 2) {
+    //     para->setDevices(std::vector<uint>{ 2u, 3u });
+    //     para->setMaxDev(2);
+    // } else 
+    //     para->setDevices(std::vector<uint>{ 0u });
+    //     para->setMaxDev(1);
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+
+
+    if (useGridGenerator) {
+        real sideLengthCube;
+        if (useLevels){
+            if (scalingType == "strong")
+                sideLengthCube = 76.0; // Phoenix: strong scaling with two levels = 76.0
+            else if (scalingType == "weak")
+                sideLengthCube = 70.0; // Phoenix: weak scaling with two levels = 70.0
+        }
+        else
+            sideLengthCube = 86.0; // Phoenix: 86.0
+        real xGridMin          = 0.0; 
+        real xGridMax          = sideLengthCube;
+        real yGridMin          = 0.0;
+        real yGridMax          = sideLengthCube;
+        real zGridMin          = 0.0;
+        real zGridMax          = sideLengthCube;
+        const real dSphere     = 10.0;
+        const real dSphereLev1 = 22.0; // Phoenix: 22.0
+        const real dCubeLev1   = 72.0; // Phoenix: 72.0
+
+        if (para->getNumprocs() > 1) {
+            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
+
+            real overlap = (real)8.0 * dxGrid;
+            gridBuilder->setNumberOfLayers(10, 8);
+
+            if (comm->getNummberOfProcess() == 2) {
+                real xSplit = 0.5 * sideLengthCube;
+                    
+                if (scalingType == "weak"){
+                    xSplit = xGridMax;
+                    xGridMax = xGridMax + sideLengthCube;
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax, zGridMax,
+                                               dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xSplit-overlap, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax,
+                                               dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, -0.5*dCubeLev1, -0.5*dCubeLev1, 
+                                                           sideLengthCube+0.5*dCubeLev1,  0.5*dCubeLev1, 0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(sideLengthCube, 0.0, 0.0, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_2GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, yGridMax, zGridMin, zGridMax));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, yGridMax, zGridMin, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
+                }
+
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                }
+
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                //////////////////////////////////////////////////////////////////////////
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                if (generatePart == 0)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                if (generatePart == 1)                    
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+           
+            } else if (comm->getNummberOfProcess() == 4) {
+                real ySplit= 0.5 * sideLengthCube;
+                real zSplit= 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax , ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( -0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                           0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(0.0, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_4GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);                    
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+            } else if (comm->getNummberOfProcess() == 8) {
+                real xSplit = 0.5 * sideLengthCube;
+                real ySplit = 0.5 * sideLengthCube;
+                real zSplit = 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {                    
+                    xSplit = xGridMax;
+                    xGridMax = xGridMax + (xGridMax-xGridMin);
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);                    
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xSplit + overlap, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                          sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_8GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+                
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 4)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 5)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zSplit, zGridMax));
+                if (generatePart == 6)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 7)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 7) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////                
+            }
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk(outPath + "grid/part" +
+            // std::to_string(generatePart) + "_"); gridBuilder->writeGridsToVtk(outPath +
+            // std::to_string(generatePart) + "/grid/"); gridBuilder->writeArrows(outPath + 
+            // std::to_string(generatePart) + " /arrow");
+
+            SimulationFileWriter::write(gridPath + std::to_string(generatePart) + "/", gridBuilder,
+                                        FILEFORMAT::BINARY);
+        } else {
+
+            gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax, dxGrid);
+
+            if (useLevels) {
+                    gridBuilder->setNumberOfLayers(10, 8);
+                if(scalingType == "strong"){
+                    gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                } else if (scalingType == "weak")
+                    gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                     sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+            }
+                
+            if (scalingType == "weak"){
+                if(useLevels){
+                    gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                }else{
+                   TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_1GPU.stl");
+                   gridBuilder->addGeometry(sphereSTL);
+                }
+            } else {
+                gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+            }
+
+            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            
+            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+            //////////////////////////////////////////////////////////////////////////
+            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+
+            // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+            //////////////////////////////////////////////////////////////////////////
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + "/grid/");
+            // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + "/arrow");
+
+            SimulationFileWriter::write(gridPath, gridBuilder, FILEFORMAT::BINARY);
+        }        
+    }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+
+    SPtr<GridProvider> gridGenerator;
+    if (useGridGenerator)
+        gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+    else {
+        gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+    }
+           
+    Simulation sim;
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+    SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+    sim.setFactories(kernelFactory, preProcessorFactory);
+    sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);
+    sim.run();
+    sim.free();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+}
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2, configFile;
+
+    if ( argv != NULL )
+    {
+        
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+
+			std::string targetPath;
+
+			targetPath = __FILE__;
+
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
+
+#ifdef _WIN32
+            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+			std::cout << targetPath << std::endl;
+
+            if (configFile.size()==0) {
+                configFile = targetPath + "config.txt";
+            }        
+
+			multipleLevel(configFile);
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const std::bad_alloc& e)
+        { 
+            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+        }
+        catch (const std::exception& e)
+        {   
+            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+        }
+        catch (...)
+        {
+            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+        }
+    }
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/SphereScaling1/config.txt b/apps/gpu/LBM/SphereScaling1/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44c5fedb297cc62f8b1b5d26c075bd5172cac081
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/config.txt
@@ -0,0 +1,46 @@
+# Tesla 03
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/SphereScaling.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+# Phoenix
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+
+# Phoenix mpich
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# Phoenix openmpi
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+#Path="E:/temp/SphereScalingResults/"
+Path=/work/y0078217/Results/SphereScalingResults/
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/
+#GridPath=E:/temp/GridSphereScaling/
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=10
+#TimeOut=10 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevStrongStream.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..234ca79bf7fc9bc844e2de361718a01609208f79
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/2LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevWeakStream.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevWeakStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbc2f125b313632c16b6f922fb32302d302a4fb
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix4GPU_2LevWeakStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/4GPU/2LevWeakStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=1000
+TimeOut=1000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongOS.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b650042a8b0192759cfd72c4b40d9bff1df89a4e
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/2LevStrongOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongStream.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f3849abb4a909b07d3c559e7c1fbf4ba9178119
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/2LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b1e745aebe158e8573ca045fc228a6f9b7dd34
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/2LevWeakOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dbdd92032462bcc85952acf2c5296c0c0c72f79
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/2LevWeakStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/gpu.cmake b/gpu.cmake
index 12f7d2991a078e3e86560dcc069bab348fae438a..370e16397069a5190cdcddfbe111c5bff45e4cc3 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -44,9 +44,12 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
+	add_subdirectory(apps/gpu/LBM/SphereScaling)
+    add_subdirectory(apps/gpu/LBM/SphereScaling1)
 	add_subdirectory(apps/gpu/LBM/MusselOyster)
-	add_subdirectory(apps/gpu/LBM/MusselOyster2x)
-	add_subdirectory(apps/gpu/LBM/MusselOyster3z)
+    add_subdirectory(apps/gpu/LBM/MusselOysterOyster)
+	#add_subdirectory(apps/gpu/LBM/MusselOyster2x)
+	#add_subdirectory(apps/gpu/LBM/MusselOyster3z)
 ELSE()
     MESSAGE( STATUS "exclude Virtual Fluids GPU." )
 ENDIF()
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8082a554963f01637295674d846397ceef34eeb2
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
@@ -0,0 +1,182 @@
+//  _    ___      __              __________      _     __        ______________   __
+// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
+// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
+// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
+// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//
+//////////////////////////////////////////////////////////////////////////
+#include "Calculation/CalcTurbulenceIntensity.h"
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+#include <basics/Core/StringUtilities/StringUtil.h>
+
+void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaManager)
+{
+    for (int lev=para->getCoarse(); lev <= para->getFine(); lev++) {
+        cudaManager->cudaAllocTurbulenceIntensity(lev, para->getParH(lev)->size_Mat_SP);
+        para->getParH(lev)->turbulenceIntensity.resize(para->getParH(lev)->size_Mat_SP);    
+    }
+        resetVelocityFluctuationsAndMeans(para, cudaManager);
+}
+
+
+void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaManager, uint tdiff)
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        cudaManager->cudaCopyTurbulenceIntensityDH(lev, para->getParH(lev)->size_Mat_SP);
+
+        for (uint i = 0; i < para->getParH(lev)->size_Mat_SP; i++) {
+            // mean velocity
+            para->getParH(lev)->vx_mean[i] = para->getParH(lev)->vx_mean[i] / (real)tdiff;
+            para->getParH(lev)->vy_mean[i] = para->getParH(lev)->vy_mean[i] / (real)tdiff;
+            para->getParH(lev)->vz_mean[i] = para->getParH(lev)->vz_mean[i] / (real)tdiff;
+
+            // fluctuations
+            para->getParH(lev)->vxx[i] = para->getParH(lev)->vxx[i] / (real)tdiff;
+            para->getParH(lev)->vyy[i] = para->getParH(lev)->vyy[i] / (real)tdiff;
+            para->getParH(lev)->vzz[i] = para->getParH(lev)->vzz[i] / (real)tdiff;
+            para->getParH(lev)->vxy[i] = para->getParH(lev)->vxy[i] / (real)tdiff;
+            para->getParH(lev)->vxz[i] = para->getParH(lev)->vxz[i] / (real)tdiff;
+            para->getParH(lev)->vyz[i] = para->getParH(lev)->vyz[i] / (real)tdiff;
+
+            para->getParH(lev)->vxx[i] =
+                para->getParH(lev)->vxx[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vx_mean[i];
+            para->getParH(lev)->vyy[i] =
+                para->getParH(lev)->vyy[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vy_mean[i];
+            para->getParH(lev)->vzz[i] =
+                para->getParH(lev)->vzz[i] - para->getParH(lev)->vz_mean[i] * para->getParH(lev)->vz_mean[i];
+            para->getParH(lev)->vxy[i] =
+                para->getParH(lev)->vxy[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vy_mean[i];
+            para->getParH(lev)->vxz[i] =
+                para->getParH(lev)->vxz[i] - para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vz_mean[i];
+            para->getParH(lev)->vyz[i] =
+                para->getParH(lev)->vyz[i] - para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vz_mean[i];
+        }
+    }
+}
+
+
+void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaManager, uint tdiff) {
+    
+
+    real fluc_squared;
+    real v_mean_squared;
+
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+    calcVelocityAndFluctuations(para, cudaManager, tdiff);
+
+        for (uint i = 0; i < para->getParH(lev)->size_Mat_SP; i++) {
+            fluc_squared = (real)(
+                1.0 / 3.0 * (para->getParH(lev)->vxx[i] + para->getParH(lev)->vyy[i] + para->getParH(lev)->vzz[i]));
+            v_mean_squared = para->getParH(lev)->vx_mean[i] * para->getParH(lev)->vx_mean[i] +
+                             para->getParH(lev)->vy_mean[i] * para->getParH(lev)->vy_mean[i] +
+                             para->getParH(lev)->vz_mean[i] * para->getParH(lev)->vz_mean[i];
+            para->getParH(lev)->turbulenceIntensity[i] = (real)sqrt(fluc_squared / v_mean_squared);
+        }
+    }
+}
+
+
+void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaManager)
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        for (unsigned int i = 0; i < para->getParH(lev)->size_Mat_SP; i++) {
+            para->getParH(lev)->vxx[i]     = (real)0.0;
+            para->getParH(lev)->vyy[i]     = (real)0.0;
+            para->getParH(lev)->vzz[i]     = (real)0.0;
+            para->getParH(lev)->vxy[i]     = (real)0.0;
+            para->getParH(lev)->vxz[i]     = (real)0.0;
+            para->getParH(lev)->vyz[i]     = (real)0.0;
+            para->getParH(lev)->vx_mean[i] = (real)0.0;
+            para->getParH(lev)->vy_mean[i] = (real)0.0;
+            para->getParH(lev)->vz_mean[i] = (real)0.0;
+        }
+
+        cudaManager->cudaCopyTurbulenceIntensityHD(lev, para->getParH(lev)->size_Mat_SP);
+    }
+}
+
+void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaManager)
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        cudaManager->cudaFreeTurbulenceIntensity(lev);
+    }
+}
+
+void writeTurbulenceIntensityToFile(Parameter *para, uint timestep)
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        std::vector<real *> data           = { para->getParH(lev)->turbulenceIntensity.data() };
+        std::vector<std::string> datanames = { "ti" };
+        writeTiStuffToFile(para, timestep, para->getParH(lev)->size_Mat_SP, data, datanames);
+    }
+}
+
+void writeVeloFluctuationToFile(Parameter *para, uint timestep) 
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        std::vector<real *> data = { para->getParH(lev)->vxx, para->getParH(lev)->vyy, para->getParH(lev)->vzz };
+        std::vector<std::string> datanames = { "vxx", "vyy", "vzz" };
+        writeTiStuffToFile(para, timestep, para->getParH(lev)->size_Mat_SP, data, datanames);
+    }
+}
+
+void writeVeloMeansToFile(Parameter *para, uint timestep) {
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        std::vector<real *> data           = { para->getParH(lev)->vx_mean, 
+                                               para->getParH(lev)->vy_mean,
+                                               para->getParH(lev)->vz_mean };
+        std::vector<std::string> datanames = { "vx_mean", "vy_mean", "vz_mean" };
+        writeTiStuffToFile(para, timestep, para->getParH(lev)->size_Mat_SP, data, datanames);
+    }
+}
+
+void writeAllTiDatafToFile(Parameter *para, uint timestep)
+{
+    for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+        std::vector<real *> data = { para->getParH(lev)->vxx,
+                                     para->getParH(lev)->vyy,
+                                     para->getParH(lev)->vzz,
+                                     para->getParH(lev)->vx_mean,
+                                     para->getParH(lev)->vy_mean,
+                                     para->getParH(lev)->vz_mean,
+                                     para->getParH(lev)->turbulenceIntensity.data() };
+        std::vector<std::string> datanames = { "vxx", "vyy", "vzz", "vx_mean", "vy_mean", "vz_mean", "ti" };
+        writeTiStuffToFile(para, timestep, para->getParH(lev)->size_Mat_SP, data, datanames);
+    }
+}
+
+void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+                        std::vector<std::string> &datanames)
+{
+    ////////////////////////////////////////////////////////////////////////
+    // set filename
+    std::string names;
+    std::for_each(datanames.begin(), datanames.end(), [&names](const std::string &s) { return names += "_" + s; });
+    std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyID()) + "_" +
+                         StringUtil::toString<int>(timestep) + names + "_ti.txt";
+    const char *fname = ffname.c_str();
+    ////////////////////////////////////////////////////////////////////////
+    // set ofstream
+    std::ofstream ostr;
+    ////////////////////////////////////////////////////////////////////////
+    // open file
+    ostr.open(fname);
+    ////////////////////////////////////////////////////////////////////////
+    // add header
+    ostr << "index_sp";
+        for (auto name : datanames) ostr << "\t" << name;
+    ostr << std::endl;
+    ////////////////////////////////////////////////////////////////////////
+    // fill file with data
+    for (int i = 0; i < sizeOfTiArray; i++) {
+        ostr << i;
+        for (auto dataset : data)
+            ostr << "\t" << dataset[i];
+        ostr << std::endl;
+    }
+    ////////////////////////////////////////////////////////////////////////
+    // close file
+    ostr.close();
+    ////////////////////////////////////////////////////////////////////////
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a2d539f3ae31f3975d03cbc0ea73dad90c20f73
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
@@ -0,0 +1,24 @@
+#ifndef CalcTurbulenceIntensity_H
+#define CalcTurbulenceIntensity_H
+
+#include "LBM/LB.h"
+#include "GPU/GPU_Interface.h"
+#include "Parameter/Parameter.h"
+#include "GPU/CudaMemoryManager.h"
+
+extern "C" void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaManager);
+extern "C" void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaManager, uint tdiff);
+extern "C" void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaManager, uint tdiff);
+extern "C" void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaManager);
+extern "C" void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaManager);
+
+
+void writeTurbulenceIntensityToFile(Parameter *para, uint timestep);
+void writeVeloFluctuationToFile(Parameter *para, uint timeste);
+void writeVeloMeansToFile(Parameter *para, uint timestep);
+void writeAllTiDatafToFile(Parameter *para, uint timestep);
+
+void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::vector<real *> &data,
+                  std::vector<std::string> &datanames);
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index beb0fc4119c166e550b732aac6d91180818dc783..c47c14521efc7833058fc7816e298aeb4d333d7f 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -377,21 +377,21 @@ void postCollisionBC(Parameter* para, int level, unsigned int t)
         //           para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
         //getLastCudaError("QVelDev27 execution failed");
         
-        QVelDevComp27( para->getParD(level)->numberofthreads, para->getParD(level)->nx,           para->getParD(level)->ny,
-                       para->getParD(level)->Qinflow.Vx,      para->getParD(level)->Qinflow.Vy,   para->getParD(level)->Qinflow.Vz,
-                       para->getParD(level)->d0SP.f[0],       para->getParD(level)->Qinflow.k,    para->getParD(level)->Qinflow.q27[0], 
-                       para->getParD(level)->kInflowQ,        para->getParD(level)->kInflowQ,     para->getParD(level)->omega,
-                       para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP,
-                       para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
-        getLastCudaError("QVelDevComp27 execution failed");
+        // QVelDevComp27( para->getParD(level)->numberofthreads, para->getParD(level)->nx,           para->getParD(level)->ny,
+        //                para->getParD(level)->Qinflow.Vx,      para->getParD(level)->Qinflow.Vy,   para->getParD(level)->Qinflow.Vz,
+        //                para->getParD(level)->d0SP.f[0],       para->getParD(level)->Qinflow.k,    para->getParD(level)->Qinflow.q27[0], 
+        //                para->getParD(level)->kInflowQ,        para->getParD(level)->kInflowQ,     para->getParD(level)->omega,
+        //                para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP,
+        //                para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
+        // getLastCudaError("QVelDevComp27 execution failed");
 
-        //QVelDevCompZeroPress27(para->getParD(level)->numberofthreads, para->getParD(level)->nx,             para->getParD(level)->ny,
-        //                       para->getParD(level)->Qinflow.Vx,      para->getParD(level)->Qinflow.Vy,     para->getParD(level)->Qinflow.Vz,
-        //                       para->getParD(level)->d0SP.f[0],       para->getParD(level)->Qinflow.k,      para->getParD(level)->Qinflow.q27[0],
-        //                       para->getParD(level)->kInflowQ,        para->getParD(level)->Qinflow.kArray, para->getParD(level)->omega,
-        //                       para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP,   para->getParD(level)->neighborZ_SP,
-        //                       para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
-        //getLastCudaError("QVelDevCompZeroPress27 execution failed");
+        QVelDevCompZeroPress27(para->getParD(level)->numberofthreads, para->getParD(level)->nx,             para->getParD(level)->ny,
+                              para->getParD(level)->Qinflow.Vx,      para->getParD(level)->Qinflow.Vy,     para->getParD(level)->Qinflow.Vz,
+                              para->getParD(level)->d0SP.f[0],       para->getParD(level)->Qinflow.k,      para->getParD(level)->Qinflow.q27[0],
+                              para->getParD(level)->kInflowQ,        para->getParD(level)->Qinflow.kArray, para->getParD(level)->omega,
+                              para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP,   para->getParD(level)->neighborZ_SP,
+                              para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
+        getLastCudaError("QVelDevCompZeroPress27 execution failed");
 
         //////////////////////////////////////////////////////////////////////////
         // D E P R E C A T E D
@@ -990,12 +990,12 @@ void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level,
 
 	if (para->getParD(level)->QPress.kQ > 0)
 	{
-		//QPressNoRhoDev27(para->getParD(level)->numberofthreads, para->getParD(level)->QPress.RhoBC,
-		//                 para->getParD(level)->d0SP.f[0],       para->getParD(level)->QPress.k,
-		//                 para->getParD(level)->QPress.kN,       para->getParD(level)->QPress.kQ,     para->getParD(level)->omega,
-		//                 para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP,  para->getParD(level)->neighborZ_SP,
-		//                 para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
-		//getLastCudaError("QPressNoRhoDev27 execution failed");
+		QPressNoRhoDev27(para->getParD(level)->numberofthreads, para->getParD(level)->QPress.RhoBC,
+		                para->getParD(level)->d0SP.f[0],       para->getParD(level)->QPress.k,
+		                para->getParD(level)->QPress.kN,       para->getParD(level)->QPress.kQ,     para->getParD(level)->omega,
+		                para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP,  para->getParD(level)->neighborZ_SP,
+		                para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
+		getLastCudaError("QPressNoRhoDev27 execution failed");
 
 		//QPressDevEQZ27(para->getParD(level)->numberofthreads, para->getParD(level)->QPress.RhoBC, 
 		//               para->getParD(level)->d0SP.f[0],       para->getParD(level)->QPress.k,  
@@ -1022,12 +1022,12 @@ void preCollisionBC(Parameter* para, CudaMemoryManager* cudaManager, int level,
         //getLastCudaError("QPressDevIncompNEQ27 execution failed");
         //////////////////////////////////////////////////////////////////////////////////
         //press NEQ compressible
-        QPressDevNEQ27( para->getParD(level)->numberofthreads, para->getParD(level)->QPress.RhoBC, 
-                        para->getParD(level)->d0SP.f[0],       para->getParD(level)->QPress.k,  
-                        para->getParD(level)->QPress.kN,       para->getParD(level)->QPress.kQ,    para->getParD(level)->omega,
-                        para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP,
-                        para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
-        getLastCudaError("QPressDevNEQ27 execution failed");
+        // QPressDevNEQ27( para->getParD(level)->numberofthreads, para->getParD(level)->QPress.RhoBC, 
+        //                 para->getParD(level)->d0SP.f[0],       para->getParD(level)->QPress.k,  
+        //                 para->getParD(level)->QPress.kN,       para->getParD(level)->QPress.kQ,    para->getParD(level)->omega,
+        //                 para->getParD(level)->neighborX_SP,    para->getParD(level)->neighborY_SP, para->getParD(level)->neighborZ_SP,
+        //                 para->getParD(level)->size_Mat_SP,     para->getParD(level)->evenOrOdd);
+        // getLastCudaError("QPressDevNEQ27 execution failed");
 
 	}
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index ce6e034d1c1eee57e062b736cfcea97e07306f3c..42d0a6f4aff4a85d4912d0dcdf73e30b6bee9eb9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -273,15 +273,15 @@ extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, re
     if(k >= size_Mat)
         return;
 
-    if (!vf::gpu::isValidFluidNode(geoD[k]))
-        return;
-
     pressD[k] = c0o1;
     rhoD[k]   = c0o1;
     vxD[k]    = c0o1;
     vyD[k]    = c0o1;
     vzD[k]    = c0o1;
 
+    if (!vf::gpu::isValidFluidNode(geoD[k]))
+        return;
+
     vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
                                                neighborZ);
     const auto &distribution = distr_wrapper.distribution;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 8f00dce4b9decd84fecb74d1e7f96849b163ca0f..140927be10537d592ec7f46550d2942caf206503 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -975,6 +975,74 @@ void CudaMemoryManager::cudaFreeTurbulentViscosity(int lev)
     checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->gDyvz));
     checkCudaErrors(cudaFreeHost(parameter->getParH(lev)->gDzvz));
 }
+//turbulence intensity
+void CudaMemoryManager::cudaAllocTurbulenceIntensity(int lev, uint size)
+{
+    uint mem_size = sizeof(real) * size;
+    // Host
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vxx        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vyy        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vzz        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vxy        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vxz        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vyz        ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vx_mean    ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vy_mean    ), mem_size));
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->vz_mean    ), mem_size));
+    //Device
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vxx            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vyy            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vzz            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vxy            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vxz            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vyz            ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vx_mean        ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vy_mean        ), mem_size));
+    checkCudaErrors( cudaMalloc((void**) &(parameter->getParD(lev)->vz_mean        ), mem_size));
+    //////////////////////////////////////////////////////////////////////////
+    double tmp = 9. * (double)mem_size;
+    setMemsizeGPU(tmp, false);
+}
+void CudaMemoryManager::cudaCopyTurbulenceIntensityHD(int lev, uint size)
+{
+    uint mem_size = sizeof(real) * size;
+    //copy host to device
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vxx    ,  parameter->getParH(lev)->vxx    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vyy    ,  parameter->getParH(lev)->vyy    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vzz    ,  parameter->getParH(lev)->vzz    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vxy    ,  parameter->getParH(lev)->vxy    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vxz    ,  parameter->getParH(lev)->vxz    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vyz    ,  parameter->getParH(lev)->vyz    ,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vx_mean,  parameter->getParH(lev)->vx_mean,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vy_mean,  parameter->getParH(lev)->vy_mean,  mem_size , cudaMemcpyHostToDevice));
+    checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->vz_mean,  parameter->getParH(lev)->vz_mean,  mem_size , cudaMemcpyHostToDevice));
+}
+void CudaMemoryManager::cudaCopyTurbulenceIntensityDH(int lev, uint size)
+{
+    uint mem_size = sizeof(real) * size;
+    //copy device to host
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vxx    ,  parameter->getParD(lev)->vxx    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vyy    ,  parameter->getParD(lev)->vyy    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vzz    ,  parameter->getParD(lev)->vzz    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vxy    ,  parameter->getParD(lev)->vxy    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vxz    ,  parameter->getParD(lev)->vxz    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vyz    ,  parameter->getParD(lev)->vyz    ,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vx_mean,  parameter->getParD(lev)->vx_mean,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vy_mean,  parameter->getParD(lev)->vy_mean,  mem_size , cudaMemcpyDeviceToHost));
+    checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->vz_mean,  parameter->getParD(lev)->vz_mean,  mem_size , cudaMemcpyDeviceToHost));
+}
+void CudaMemoryManager::cudaFreeTurbulenceIntensity(int lev)
+{
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vxx     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vyy     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vzz     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vxy     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vxz     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vyz     ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vx_mean ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vy_mean ));
+    checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->vz_mean ));
+}
 //median
 void CudaMemoryManager::cudaAllocMedianSP(int lev)
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index 7e81ecbe9bd2c2bc39cc9a5dc3b01cfffbc16d1e..3d7d39d85138b2661960cbf822756421380bfd0c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -139,6 +139,11 @@ public:
     void cudaCopyTurbulentViscosityHD(int lev);
     void cudaCopyTurbulentViscosityDH(int lev);
     void cudaFreeTurbulentViscosity(int lev);
+
+    void cudaAllocTurbulenceIntensity(int lev, uint size);
+    void cudaCopyTurbulenceIntensityHD(int lev, uint size);
+    void cudaCopyTurbulenceIntensityDH(int lev, uint size);
+    void cudaFreeTurbulenceIntensity(int lev);
     
     void cudaAllocMedianSP(int lev);
     void cudaCopyMedianSP(int lev);
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index c2f01e4f57c25f219d517fdf2a111ecd25f0f794..93e78c82bf149a5b5398f145dbed86cde2302ea5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -2526,4 +2526,23 @@ extern "C" void generateRandomValuesDevice(curandState* state,
 										   real* randArray,
 										   unsigned int numberOfThreads);
 
+extern "C" void CalcTurbulenceIntensityDevice(
+   real* vxx,
+   real* vyy,
+   real* vzz,
+   real* vxy,
+   real* vxz,
+   real* vyz,
+   real* vx_mean,
+   real* vy_mean,
+   real* vz_mean,
+   real* DD, 
+   uint *typeOfGridNode, 
+   unsigned int* neighborX,
+   unsigned int* neighborY,
+   unsigned int* neighborZ,
+   unsigned int size_Mat, 
+   bool evenOrOdd,
+   uint numberOfThreads);
+
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 288db43e7bcd36dc4d187982b86178d345601094..c1e4ce45b8e4ccf70f084eb7742fcf9d1bfdb47f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -2407,6 +2407,23 @@ extern "C" __global__ void initRandom(curandState* state);
 extern "C" __global__ void generateRandomValues(curandState* state, 
 												real* randArray);
 
+extern "C" __global__ void CalcTurbulenceIntensity(
+   real* vxx,
+   real* vyy,
+   real* vzz,
+   real* vxy,
+   real* vxz,
+   real* vyz,
+   real* vx_mean,
+   real* vy_mean,
+   real* vz_mean,
+   real* DD, 
+   uint *typeOfGridNode, 
+   unsigned int* neighborX,
+   unsigned int* neighborY,
+   unsigned int* neighborZ,
+   unsigned int size_Mat, 
+   bool evenOrOdd);
 
 #endif
 							 
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 57194fe3bf36c6357454d1e6cef6e80fd0f80b60..d3bfb5d125028ac94a8bbdc87192351d90617989 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -7345,6 +7345,61 @@ extern "C" void generateRandomValuesDevice( curandState* state,
    generateRandomValues<<< gridQ, threads >>> (state,randArray);
    getLastCudaError("generateRandomValues execution failed"); 
 }
+//////////////////////////////////////////////////////////////////////////
+extern "C" void CalcTurbulenceIntensityDevice(
+   real* vxx,
+   real* vyy,
+   real* vzz,
+   real* vxy,
+   real* vxz,
+   real* vyz,
+   real* vx_mean,
+   real* vy_mean,
+   real* vz_mean,
+   real* DD, 
+   uint* typeOfGridNode, 
+   unsigned int* neighborX,
+   unsigned int* neighborY,
+   unsigned int* neighborZ,
+   unsigned int size_Mat, 
+   bool evenOrOdd,
+   uint numberOfThreads)
+{
+   int Grid = (size_Mat / numberOfThreads)+1;
+   int Grid1, Grid2;
+   if (Grid>512)
+   {
+      Grid1 = 512;
+      Grid2 = (Grid/Grid1)+1;
+   } 
+   else
+   {
+      Grid1 = 1;
+      Grid2 = Grid;
+   }
+   dim3 gridQ(Grid1, Grid2);
+   dim3 threads(numberOfThreads, 1, 1 );
+
+   CalcTurbulenceIntensity<<<gridQ, threads>>>(
+     vxx,
+     vyy,
+     vzz,
+	 vxy,
+     vxz,
+     vyz,
+     vx_mean,
+     vy_mean,
+     vz_mean,
+     DD, 
+     typeOfGridNode, 
+     neighborX,
+     neighborY,
+     neighborZ,
+     size_Mat, 
+     evenOrOdd);
+
+   getLastCudaError("CalcTurbulenceIntensity execution failed"); 
+}
 
 
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
new file mode 100644
index 0000000000000000000000000000000000000000..42ac0cd4ffc6da19e67f88cbf430677dcfa8a826
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
@@ -0,0 +1,73 @@
+//  _    ___      __              __________      _     __        ______________   __
+// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
+// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
+// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
+// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//
+//////////////////////////////////////////////////////////////////////////
+
+/* Device code */
+#include "LBM/LB.h" 
+#include "LBM/D3Q27.h"
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+#include "lbm/MacroscopicQuantities.h"
+#include "../Kernel/Utilities/DistributionHelper.cuh"
+
+
+using namespace vf::lbm::constant;
+
+//////////////////////////////////////////////////////////////////////////////
+extern "C" __global__ void CalcTurbulenceIntensity(
+   real* vxx,
+   real* vyy,
+   real* vzz,
+   real* vxy,
+   real* vxz,
+   real* vyz,
+   real* vx_mean,
+   real* vy_mean,
+   real* vz_mean, 
+   real *distributions, 
+   uint* typeOfGridNode, 
+   unsigned int* neighborX,
+   unsigned int* neighborY,
+   unsigned int* neighborZ,
+   unsigned int size_Mat, 
+   bool isEvenTimestep)
+{
+   const unsigned k = vf::gpu::getNodeIndex();
+
+   if (k >= size_Mat)
+       return;
+
+   if (!vf::gpu::isValidFluidNode(typeOfGridNode[k]))
+       return;
+
+   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
+                                              neighborZ);
+   const auto &distribution = distr_wrapper.distribution;
+
+   // analogue to LBCalcMacCompSP27
+   real rho   = vf::lbm::getDensity(distribution.f);
+   real vx    = vf::lbm::getCompressibleVelocityX1(distribution.f, rho);
+   real vy    = vf::lbm::getCompressibleVelocityX2(distribution.f, rho);
+   real vz    = vf::lbm::getCompressibleVelocityX3(distribution.f, rho);   
+
+
+   // compute subtotals:
+   // fluctuations
+   vxx[k] = vxx[k] + vx * vx;
+   vyy[k] = vyy[k] + vy * vy;
+   vzz[k] = vzz[k] + vz * vz;
+   vxy[k] = vxy[k] + vx * vy;
+   vxz[k] = vxz[k] + vx * vz;
+   vyz[k] = vyz[k] + vy * vz;
+
+   // velocity (for mean velocity)
+   vx_mean[k] = vx_mean[k] + vx;
+   vy_mean[k] = vy_mean[k] + vy;
+   vz_mean[k] = vz_mean[k] + vz; 
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index ae6d1db2e299c0b075e223243e66c40b321abe07..92096a76abc023c478921cd2543aec7c48bb6a32 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -36,6 +36,7 @@
 #include "Calculation/Cp.h"
 #include "Calculation/Calc2ndMoments.h"
 #include "Calculation/CalcMedian.h"
+#include "Calculation/CalcTurbulenceIntensity.h"
 #include "Calculation/ForceCalculations.h"
 #include "Calculation/PorousMedia.h"
 //////////////////////////////////////////////////////////////////////////
@@ -202,12 +203,20 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    //////////////////////////////////////////////////////////////////////////
    if (para->getCalcMedian())
    {
-       output << "alloc Calculation for Mean Valus  " << "\n";
+       output << "alloc Calculation for Mean Values  " << "\n";
 	   if (para->getDiffOn())	allocMedianAD(para.get(), cudaManager.get());
 	   else						allocMedian(para.get(), cudaManager.get());
    }
 
 
+   //////////////////////////////////////////////////////////////////////////
+   // Turbulence Intensity
+   //////////////////////////////////////////////////////////////////////////
+   if (para->getCalcTurbulenceIntensity()) {
+       output << "alloc arrays for calculating Turbulence Intensity  " << "\n";
+       allocTurbulenceIntensity(para.get(), cudaManager.get());
+   }
+
    //////////////////////////////////////////////////////////////////////////
    //allocate memory and initialize 2nd, 3rd and higher order moments
    //////////////////////////////////////////////////////////////////////////
@@ -374,6 +383,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
 
    //////////////////////////////////////////////////////////////////////////
    output << "used Device Memory: " << cudaManager->getMemsizeGPU() / 1000000.0 << " MB\n";
+   std::cout << "Process " << comm->getPID() <<": used device memory" << cudaManager->getMemsizeGPU() / 1000000.0 << " MB\n" << std::endl;
    //////////////////////////////////////////////////////////////////////////
 
    //InterfaceDebugWriter::writeInterfaceLinesDebugCF(para.get());
@@ -402,6 +412,7 @@ void Simulation::run()
    ftimeE   = 0.0f;
    ftimeS   = 0.0f;
    unsigned int t, t_prev;
+   uint t_turbulenceIntensity = 0;
    unsigned int t_MP = 0;
    //////////////////////////////////////////////////////////////////////////
    para->setStepEnsight(0);
@@ -506,6 +517,30 @@ void Simulation::run()
         
           }
         }
+
+		if (para->getCalcTurbulenceIntensity()) {
+            for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
+				CalcTurbulenceIntensityDevice(
+				    para->getParD(lev)->vxx,
+				    para->getParD(lev)->vyy,
+				    para->getParD(lev)->vzz,
+				    para->getParD(lev)->vxy,
+				    para->getParD(lev)->vxz,
+				    para->getParD(lev)->vyz,
+				    para->getParD(lev)->vx_mean,
+				    para->getParD(lev)->vy_mean,
+				    para->getParD(lev)->vz_mean,
+				    para->getParD(lev)->d0SP.f[0], 
+				    para->getParD(lev)->geoSP,
+				    para->getParD(lev)->neighborX_SP,
+				    para->getParD(lev)->neighborY_SP, 
+				    para->getParD(lev)->neighborZ_SP,
+				    para->getParD(lev)->size_Mat_SP,
+				    para->getParD(lev)->evenOrOdd,
+				    para->getParD(lev)->numberofthreads
+				);
+			}
+		}
         ////////////////////////////////////////////////////////////////////////////////
 
 
@@ -952,9 +987,20 @@ void Simulation::run()
 				resetMedian(para.get());
 				/////////////////////////////////
 			}
+            if (para->getCalcTurbulenceIntensity()) 
+			{
+                uint t_diff = t - t_turbulenceIntensity;
+                calcTurbulenceIntensity(para.get(), cudaManager.get(), t_diff);
+                //writeAllTiDatafToFile(para.get(), t);
+            }
 			////////////////////////////////////////////////////////////////////////
 			dataWriter->writeTimestep(para, t);
 			////////////////////////////////////////////////////////////////////////
+            if (para->getCalcTurbulenceIntensity()) {
+                t_turbulenceIntensity = t;
+                resetVelocityFluctuationsAndMeans(para.get(), cudaManager.get());
+            }
+			////////////////////////////////////////////////////////////////////////
             if (para->getCalcDragLift()) printDragLift(para.get(), cudaManager.get(), t);
 			////////////////////////////////////////////////////////////////////////
 			if (para->getCalcParticle()) copyAndPrintParticles(para.get(), cudaManager.get(), t, false);
@@ -1334,6 +1380,10 @@ void Simulation::free()
 		}
 	}
 	//////////////////////////////////////////////////////////////////////////
+	// Turbulence Intensity
+	if (para->getCalcTurbulenceIntensity()) {
+        cudaFreeTurbulenceIntensityArrays(para.get(), cudaManager.get());
+	}
 
     delete comm;
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
index 0a9e34166652613659b5b1d609bc40d07f6cee76..06694ca352249ed2ec0907504f21c66a3164fc32 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
@@ -183,6 +183,16 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
     nodedatanames.push_back("geo");
     //nodedatanames.push_back("sendNodes");
     //nodedatanames.push_back("sparseIndex");
+
+    uint firstTurbNode = nodedatanames.size();
+    if (para->getCalcTurbulenceIntensity()) {
+        nodedatanames.push_back("vxx");
+        nodedatanames.push_back("vyy");
+        nodedatanames.push_back("vzz");
+        nodedatanames.push_back("vxy");
+        nodedatanames.push_back("vxz");
+        nodedatanames.push_back("vyz");
+	}
     unsigned int number1, number2, number3, number4, number5, number6, number7, number8;
     uint dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8;
     bool neighborsAreFluid;
@@ -229,10 +239,20 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
                 nodedata[4][dn1] = (double)para->getParH(level)->vz_SP[pos] * (double)para->getVelocityRatio();
                 nodedata[5][dn1] = (double)para->getParH(level)->geoSP[pos];
 
+                //nodedata[6][dn1] = (double) pos;
+
 				//int sendNode = 0; // 0 - not a sendNode; 1 - sendNode; 2 - sendNode in communication after fine to coarse
     //            testForSendNodeZ(para, level, pos, sendNode); // slow and should not be done multiple times --> use for debugging only!
 				//nodedata[6][dn1] = (double) sendNode;
-    //            nodedata[7][dn1] = (double) pos;
+
+                if (para->getCalcTurbulenceIntensity()) {
+                    nodedata[firstTurbNode    ][dn1] = (double)para->getParH(level)->vxx[pos];
+                    nodedata[firstTurbNode + 1][dn1] = (double)para->getParH(level)->vyy[pos];
+                    nodedata[firstTurbNode + 2][dn1] = (double)para->getParH(level)->vzz[pos];
+                    nodedata[firstTurbNode + 3][dn1] = (double)para->getParH(level)->vxy[pos];
+                    nodedata[firstTurbNode + 4][dn1] = (double)para->getParH(level)->vxz[pos];
+                    nodedata[firstTurbNode + 5][dn1] = (double)para->getParH(level)->vyz[pos];
+                }
 
                 //////////////////////////////////////////////////////////////////////////
                 number2 = para->getParH(level)->neighborX_SP[number1];
@@ -641,5 +661,3 @@ void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> par
 
 
 
-
-
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index ab810a5e38740e1d9b6953f2bbec85864276e482..ca2481ff99502b7b63faecad53e098af56d39693 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -59,7 +59,7 @@ Parameter::~Parameter() = default;
 
 void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
 {
-   if (configData.contains("NumberOfDevices"))
+    if (configData.contains("NumberOfDevices"))
         this->setMaxDev(configData.getValue<int>("NumberOfDevices"));
 
     //////////////////////////////////////////////////////////////////////////
@@ -200,6 +200,19 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
         this->setFactorPressBC(configData.getValue<real>("FactorPressBC"));
 
     //////////////////////////////////////////////////////////////////////////
+    // CUDA streams and optimized communication
+    if(this->getNumprocs() > 1) {
+        if (configData.contains("useStreams")) {
+            if (configData.getValue<bool>("useStreams")) 
+                this->setUseStreams();
+        }
+
+        if (configData.contains("useReducedCommunicationInInterpolation")){
+            this->useReducedCommunicationAfterFtoC = configData.getValue<bool>("useReducedCommunicationInInterpolation");
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+
     //read Geometry (STL)
     if (configData.contains("ReadGeometry"))
         this->setReadGeo(configData.getValue<bool>("ReadGeometry"));
@@ -436,130 +449,130 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
 
 void Parameter::initLBMSimulationParameter()
 {
-	//host
-	for (int i = coarse; i <= fine; i++)
-	{
-		parH[i]                        = std::make_shared<LBMSimulationParameter>();
-		parH[i]->numberofthreads       = 64;// 128;
-		parH[i]->gridNX                = getGridX().at(i);
-		parH[i]->gridNY                = getGridY().at(i);
-		parH[i]->gridNZ                = getGridZ().at(i);
-		parH[i]->vis                   = ic.vis*pow(2.f,i);
-		parH[i]->diffusivity           = ic.Diffusivity*pow(2.f,i);
-		parH[i]->omega                 = 1.0f/(3.0f*parH[i]->vis+0.5f);//omega :-) not s9 = -1.0f/(3.0f*parH[i]->vis+0.5f);//
-		parH[i]->nx                    = parH[i]->gridNX + 2 * STARTOFFX;
-		parH[i]->ny                    = parH[i]->gridNY + 2 * STARTOFFY;
-		parH[i]->nz                    = parH[i]->gridNZ + 2 * STARTOFFZ;
-		parH[i]->size_Mat              = parH[i]->nx * parH[i]->ny * parH[i]->nz;
-		parH[i]->sizePlaneXY           = parH[i]->nx * parH[i]->ny;
-		parH[i]->sizePlaneYZ           = parH[i]->ny * parH[i]->nz;
-		parH[i]->sizePlaneXZ           = parH[i]->nx * parH[i]->nz;
-		parH[i]->mem_size_real         = sizeof(real     ) * parH[i]->size_Mat;
-		parH[i]->mem_size_int          = sizeof(unsigned int) * parH[i]->size_Mat;
-		parH[i]->mem_size_bool         = sizeof(bool        ) * parH[i]->size_Mat;
-		parH[i]->mem_size_real_yz      = sizeof(real     ) * parH[i]->ny * parH[i]->nz;
-		parH[i]->evenOrOdd             = true;
-		parH[i]->startz                = parH[i]->gridNZ * ic.myid;
-		parH[i]->endz                  = parH[i]->gridNZ * ic.myid + parH[i]->gridNZ;
-		parH[i]->Lx                    = (real)((1.f*parH[i]->gridNX - 1.f)/(pow(2.f,i)));
-		parH[i]->Ly                    = (real)((1.f*parH[i]->gridNY - 1.f)/(pow(2.f,i)));
-		parH[i]->Lz                    = (real)((1.f*parH[i]->gridNZ - 1.f)/(pow(2.f,i)));
-		parH[i]->dx                    = (real)(1.f/(pow(2.f,i)));
-		parH[i]->XdistKn               = getDistX().at(i);
-		parH[i]->YdistKn               = getDistY().at(i);
-		parH[i]->ZdistKn               = getDistZ().at(i);
-		if (i == coarse)
-		{
-			parH[i]->distX                 = (real)getDistX().at(i);
-			parH[i]->distY                 = (real)getDistY().at(i);
-			parH[i]->distZ                 = (real)getDistZ().at(i);
-			parH[i]->mTtoWx                = (real)1.0f;
-			parH[i]->mTtoWy                = (real)1.0f;
-			parH[i]->mTtoWz                = (real)1.0f;
-			parH[i]->cTtoWx                = (real)0.0f;
-			parH[i]->cTtoWy                = (real)0.0f;
-			parH[i]->cTtoWz                = (real)0.0f;
-			////MGs Trafo///////////////////////////////////////////////////////////////
-			//parH[i]->cStartx               = (real)parH[i]->XdistKn;
-			//parH[i]->cStarty               = (real)parH[i]->XdistKn;
-			//parH[i]->cStartz               = (real)parH[i]->XdistKn;
-			////////////////////////////////////////////////////////////////////////////
-		} 
-		else
-		{
-			//Geller
-			parH[i]->distX                 = ((real)getDistX().at(i) + 0.25f) * parH[i-1]->dx;
-			parH[i]->distY                 = ((real)getDistY().at(i) + 0.25f) * parH[i-1]->dx;
-			parH[i]->distZ                 = ((real)getDistZ().at(i) + 0.25f) * parH[i-1]->dx;
-			//parH[i]->distX                 = ((real)getDistX().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distX;
-			//parH[i]->distY                 = ((real)getDistY().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distY;
-			//parH[i]->distZ                 = ((real)getDistZ().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distZ;
-			parH[i]->mTtoWx                = (real)pow(0.5f,i);
-			parH[i]->mTtoWy                = (real)pow(0.5f,i);
-			parH[i]->mTtoWz                = (real)pow(0.5f,i);
-			parH[i]->cTtoWx                = (real)(STARTOFFX/2.f + (parH[i]->gridNX+1.f)/4.f); //funzt nur fuer zwei level
-			parH[i]->cTtoWy                = (real)(STARTOFFY/2.f + (parH[i]->gridNY+1.f)/4.f); //funzt nur fuer zwei level
-			parH[i]->cTtoWz                = (real)(STARTOFFZ/2.f + (parH[i]->gridNZ+1.f)/4.f); //funzt nur fuer zwei level
-			////MGs Trafo///////////////////////////////////////////////////////////////
-			//parH[i]->cStartx               = (real)parH[i]->XdistKn;
-			//parH[i]->cStarty               = (real)parH[i]->XdistKn;
-			//parH[i]->cStartz               = (real)parH[i]->XdistKn;
-			////////////////////////////////////////////////////////////////////////////
-		}
-	}
-
-	//device
-	for (int i = coarse; i <= fine; i++)
-	{
-		parD[i]                        = std::make_shared<LBMSimulationParameter>();
-		parD[i]->numberofthreads       = parH[i]->numberofthreads;
-		parD[i]->gridNX                = parH[i]->gridNX;
-		parD[i]->gridNY                = parH[i]->gridNY;
-		parD[i]->gridNZ                = parH[i]->gridNZ;
-		parD[i]->vis                   = parH[i]->vis;
-		parD[i]->diffusivity           = parH[i]->diffusivity;
-		parD[i]->omega                 = parH[i]->omega;
-		parD[i]->nx                    = parH[i]->nx;
-		parD[i]->ny                    = parH[i]->ny;
-		parD[i]->nz                    = parH[i]->nz;
-		parD[i]->size_Mat              = parH[i]->size_Mat;
-		parD[i]->sizePlaneXY           = parH[i]->sizePlaneXY;
-		parD[i]->sizePlaneYZ           = parH[i]->sizePlaneYZ;
-		parD[i]->sizePlaneXZ           = parH[i]->sizePlaneXZ;
-		parD[i]->mem_size_real         = sizeof(real     ) * parD[i]->size_Mat;
-		parD[i]->mem_size_int          = sizeof(unsigned int) * parD[i]->size_Mat;
-		parD[i]->mem_size_bool         = sizeof(bool        ) * parD[i]->size_Mat;
-		parD[i]->mem_size_real_yz      = sizeof(real     ) * parD[i]->ny * parD[i]->nz;
-		parD[i]->evenOrOdd             = parH[i]->evenOrOdd;
-		parD[i]->startz                = parH[i]->startz;
-		parD[i]->endz                  = parH[i]->endz;
-		parD[i]->Lx                    = parH[i]->Lx;
-		parD[i]->Ly                    = parH[i]->Ly;
-		parD[i]->Lz                    = parH[i]->Lz;
-		parD[i]->dx                    = parH[i]->dx;
-		parD[i]->XdistKn               = parH[i]->XdistKn;
-		parD[i]->YdistKn               = parH[i]->YdistKn;
-		parD[i]->ZdistKn               = parH[i]->ZdistKn;
-		parD[i]->distX                 = parH[i]->distX;
-		parD[i]->distY                 = parH[i]->distY;
-		parD[i]->distZ                 = parH[i]->distZ;
-	}
+    //host
+    for (int i = coarse; i <= fine; i++)
+    {
+        parH[i]                        = std::make_shared<LBMSimulationParameter>();
+        parH[i]->numberofthreads       = 64;// 128;
+        parH[i]->gridNX                = getGridX().at(i);
+        parH[i]->gridNY                = getGridY().at(i);
+        parH[i]->gridNZ                = getGridZ().at(i);
+        parH[i]->vis                   = ic.vis*pow(2.f,i);
+        parH[i]->diffusivity           = ic.Diffusivity*pow(2.f,i);
+        parH[i]->omega                 = 1.0f/(3.0f*parH[i]->vis+0.5f);//omega :-) not s9 = -1.0f/(3.0f*parH[i]->vis+0.5f);//
+        parH[i]->nx                    = parH[i]->gridNX + 2 * STARTOFFX;
+        parH[i]->ny                    = parH[i]->gridNY + 2 * STARTOFFY;
+        parH[i]->nz                    = parH[i]->gridNZ + 2 * STARTOFFZ;
+        parH[i]->size_Mat              = parH[i]->nx * parH[i]->ny * parH[i]->nz;
+        parH[i]->sizePlaneXY           = parH[i]->nx * parH[i]->ny;
+        parH[i]->sizePlaneYZ           = parH[i]->ny * parH[i]->nz;
+        parH[i]->sizePlaneXZ           = parH[i]->nx * parH[i]->nz;
+        parH[i]->mem_size_real         = sizeof(real     ) * parH[i]->size_Mat;
+        parH[i]->mem_size_int          = sizeof(unsigned int) * parH[i]->size_Mat;
+        parH[i]->mem_size_bool         = sizeof(bool        ) * parH[i]->size_Mat;
+        parH[i]->mem_size_real_yz      = sizeof(real     ) * parH[i]->ny * parH[i]->nz;
+        parH[i]->evenOrOdd             = true;
+        parH[i]->startz                = parH[i]->gridNZ * ic.myid;
+        parH[i]->endz                  = parH[i]->gridNZ * ic.myid + parH[i]->gridNZ;
+        parH[i]->Lx                    = (real)((1.f*parH[i]->gridNX - 1.f)/(pow(2.f,i)));
+        parH[i]->Ly                    = (real)((1.f*parH[i]->gridNY - 1.f)/(pow(2.f,i)));
+        parH[i]->Lz                    = (real)((1.f*parH[i]->gridNZ - 1.f)/(pow(2.f,i)));
+        parH[i]->dx                    = (real)(1.f/(pow(2.f,i)));
+        parH[i]->XdistKn               = getDistX().at(i);
+        parH[i]->YdistKn               = getDistY().at(i);
+        parH[i]->ZdistKn               = getDistZ().at(i);
+        if (i == coarse)
+        {
+            parH[i]->distX                 = (real)getDistX().at(i);
+            parH[i]->distY                 = (real)getDistY().at(i);
+            parH[i]->distZ                 = (real)getDistZ().at(i);
+            parH[i]->mTtoWx                = (real)1.0f;
+            parH[i]->mTtoWy                = (real)1.0f;
+            parH[i]->mTtoWz                = (real)1.0f;
+            parH[i]->cTtoWx                = (real)0.0f;
+            parH[i]->cTtoWy                = (real)0.0f;
+            parH[i]->cTtoWz                = (real)0.0f;
+            ////MGs Trafo///////////////////////////////////////////////////////////////
+            //parH[i]->cStartx               = (real)parH[i]->XdistKn;
+            //parH[i]->cStarty               = (real)parH[i]->XdistKn;
+            //parH[i]->cStartz               = (real)parH[i]->XdistKn;
+            ////////////////////////////////////////////////////////////////////////////
+        } 
+        else
+        {
+            //Geller
+            parH[i]->distX                 = ((real)getDistX().at(i) + 0.25f) * parH[i-1]->dx;
+            parH[i]->distY                 = ((real)getDistY().at(i) + 0.25f) * parH[i-1]->dx;
+            parH[i]->distZ                 = ((real)getDistZ().at(i) + 0.25f) * parH[i-1]->dx;
+            //parH[i]->distX                 = ((real)getDistX().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distX;
+            //parH[i]->distY                 = ((real)getDistY().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distY;
+            //parH[i]->distZ                 = ((real)getDistZ().at(i) + 0.25f) * parH[i-1]->dx + parH[i-1]->distZ;
+            parH[i]->mTtoWx                = (real)pow(0.5f,i);
+            parH[i]->mTtoWy                = (real)pow(0.5f,i);
+            parH[i]->mTtoWz                = (real)pow(0.5f,i);
+            parH[i]->cTtoWx                = (real)(STARTOFFX/2.f + (parH[i]->gridNX+1.f)/4.f); //funzt nur fuer zwei level
+            parH[i]->cTtoWy                = (real)(STARTOFFY/2.f + (parH[i]->gridNY+1.f)/4.f); //funzt nur fuer zwei level
+            parH[i]->cTtoWz                = (real)(STARTOFFZ/2.f + (parH[i]->gridNZ+1.f)/4.f); //funzt nur fuer zwei level
+            ////MGs Trafo///////////////////////////////////////////////////////////////
+            //parH[i]->cStartx               = (real)parH[i]->XdistKn;
+            //parH[i]->cStarty               = (real)parH[i]->XdistKn;
+            //parH[i]->cStartz               = (real)parH[i]->XdistKn;
+            ////////////////////////////////////////////////////////////////////////////
+        }
+    }
+
+    //device
+    for (int i = coarse; i <= fine; i++)
+    {
+        parD[i]                        = std::make_shared<LBMSimulationParameter>();
+        parD[i]->numberofthreads       = parH[i]->numberofthreads;
+        parD[i]->gridNX                = parH[i]->gridNX;
+        parD[i]->gridNY                = parH[i]->gridNY;
+        parD[i]->gridNZ                = parH[i]->gridNZ;
+        parD[i]->vis                   = parH[i]->vis;
+        parD[i]->diffusivity           = parH[i]->diffusivity;
+        parD[i]->omega                 = parH[i]->omega;
+        parD[i]->nx                    = parH[i]->nx;
+        parD[i]->ny                    = parH[i]->ny;
+        parD[i]->nz                    = parH[i]->nz;
+        parD[i]->size_Mat              = parH[i]->size_Mat;
+        parD[i]->sizePlaneXY           = parH[i]->sizePlaneXY;
+        parD[i]->sizePlaneYZ           = parH[i]->sizePlaneYZ;
+        parD[i]->sizePlaneXZ           = parH[i]->sizePlaneXZ;
+        parD[i]->mem_size_real         = sizeof(real     ) * parD[i]->size_Mat;
+        parD[i]->mem_size_int          = sizeof(unsigned int) * parD[i]->size_Mat;
+        parD[i]->mem_size_bool         = sizeof(bool        ) * parD[i]->size_Mat;
+        parD[i]->mem_size_real_yz      = sizeof(real     ) * parD[i]->ny * parD[i]->nz;
+        parD[i]->evenOrOdd             = parH[i]->evenOrOdd;
+        parD[i]->startz                = parH[i]->startz;
+        parD[i]->endz                  = parH[i]->endz;
+        parD[i]->Lx                    = parH[i]->Lx;
+        parD[i]->Ly                    = parH[i]->Ly;
+        parD[i]->Lz                    = parH[i]->Lz;
+        parD[i]->dx                    = parH[i]->dx;
+        parD[i]->XdistKn               = parH[i]->XdistKn;
+        parD[i]->YdistKn               = parH[i]->YdistKn;
+        parD[i]->ZdistKn               = parH[i]->ZdistKn;
+        parD[i]->distX                 = parH[i]->distX;
+        parD[i]->distY                 = parH[i]->distY;
+        parD[i]->distZ                 = parH[i]->distZ;
+    }
 }
 
 void Parameter::copyMeasurePointsArrayToVector(int lev)
 {
-	int valuesPerClockCycle = (int)(getclockCycleForMP()/getTimestepForMP());
-	for(int i = 0; i < (int)parH[lev]->MP.size(); i++)
-	{
-		for(int j = 0; j < valuesPerClockCycle; j++)
-		{
-			int index = i*valuesPerClockCycle+j;
-			parH[lev]->MP[i].Vx.push_back(parH[lev]->VxMP[index]);
-			parH[lev]->MP[i].Vy.push_back(parH[lev]->VyMP[index]);
-			parH[lev]->MP[i].Vz.push_back(parH[lev]->VzMP[index]);
-			parH[lev]->MP[i].Rho.push_back(parH[lev]->RhoMP[index]);
-		}
-	}
+    int valuesPerClockCycle = (int)(getclockCycleForMP()/getTimestepForMP());
+    for(int i = 0; i < (int)parH[lev]->MP.size(); i++)
+    {
+        for(int j = 0; j < valuesPerClockCycle; j++)
+        {
+            int index = i*valuesPerClockCycle+j;
+            parH[lev]->MP[i].Vx.push_back(parH[lev]->VxMP[index]);
+            parH[lev]->MP[i].Vy.push_back(parH[lev]->VyMP[index]);
+            parH[lev]->MP[i].Vz.push_back(parH[lev]->VzMP[index]);
+            parH[lev]->MP[i].Rho.push_back(parH[lev]->RhoMP[index]);
+        }
+    }
 }
 
 
@@ -568,55 +581,55 @@ void Parameter::copyMeasurePointsArrayToVector(int lev)
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 void Parameter::setForcing(real forcingX, real forcingY, real forcingZ)
 {
-	this->hostForcing[0] = forcingX;
-	this->hostForcing[1] = forcingY;
-	this->hostForcing[2] = forcingZ;
+    this->hostForcing[0] = forcingX;
+    this->hostForcing[1] = forcingY;
+    this->hostForcing[2] = forcingZ;
 }
 void Parameter::setQuadricLimiters(real quadricLimiterP, real quadricLimiterM, real quadricLimiterD)
 {
-	this->hostQuadricLimiters[0] = quadricLimiterP;
-	this->hostQuadricLimiters[1] = quadricLimiterM;
-	this->hostQuadricLimiters[2] = quadricLimiterD;
+    this->hostQuadricLimiters[0] = quadricLimiterP;
+    this->hostQuadricLimiters[1] = quadricLimiterM;
+    this->hostQuadricLimiters[2] = quadricLimiterD;
 }
 void Parameter::setPhi(real inPhi)
 {
-	Phi = inPhi;
+    Phi = inPhi;
 }
 void Parameter::setAngularVelocity(real inAngVel)
 {
-	angularVelocity = inAngVel;
+    angularVelocity = inAngVel;
 }
 void Parameter::setStepEnsight(unsigned int step)
 {
-	this->stepEnsight = step;
+    this->stepEnsight = step;
 }
 void Parameter::setOutputCount(unsigned int outputCount)
 {
-	this->outputCount = outputCount;
+    this->outputCount = outputCount;
 }
 void Parameter::setlimitOfNodesForVTK(unsigned int limitOfNodesForVTK)
 {
-	this->limitOfNodesForVTK = limitOfNodesForVTK;
+    this->limitOfNodesForVTK = limitOfNodesForVTK;
 }
 void Parameter::setStartTurn(unsigned int inStartTurn)
 {
-	startTurn = inStartTurn;
+    startTurn = inStartTurn;
 }
 void Parameter::setDiffOn(bool isDiff)
 {
-	diffOn = isDiff;
+    diffOn = isDiff;
 }
 void Parameter::setCompOn(bool isComp)
 {
-	compOn = isComp;
+    compOn = isComp;
 }
 void Parameter::setDiffMod(int DiffMod)
 {
-	diffMod = DiffMod;
+    diffMod = DiffMod;
 }
 void Parameter::setD3Qxx(int d3qxx)
 {
-	this->D3Qxx = d3qxx;
+    this->D3Qxx = d3qxx;
 }
 void Parameter::setMaxLevel(int maxlevel)
 {
@@ -627,328 +640,332 @@ void Parameter::setMaxLevel(int maxlevel)
 }
 void Parameter::setParticleBasicLevel(int pbl)
 {
-	this->particleBasicLevel = pbl;
+    this->particleBasicLevel = pbl;
 }
 void Parameter::setParticleInitLevel(int pil)
 {
-	this->particleInitLevel = pil;
+    this->particleInitLevel = pil;
 }
 void Parameter::setNumberOfParticles(int nop)
 {
-	this->numberOfParticles = nop;
+    this->numberOfParticles = nop;
 }
 void Parameter::setCalcParticles(bool calcParticles)
 {
-	this->calcParticles = calcParticles;
+    this->calcParticles = calcParticles;
 }
 void Parameter::setStartXHotWall(real startXHotWall)
 {
-	this->startXHotWall = startXHotWall;
+    this->startXHotWall = startXHotWall;
 }
 void Parameter::setEndXHotWall(real endXHotWall)
 {
-	this->endXHotWall = endXHotWall;
+    this->endXHotWall = endXHotWall;
 }
 void Parameter::setTEnd(unsigned int tend)
 {
-	ic.tend = tend;
+    ic.tend = tend;
 }
 void Parameter::setTOut(unsigned int tout)
 {
-	ic.tout = tout;
+    ic.tout = tout;
 }
 void Parameter::setTStartOut(unsigned int tStartOut)
 {
-	ic.tStartOut = tStartOut;
+    ic.tStartOut = tStartOut;
 }
 void Parameter::setTimestepOfCoarseLevel(unsigned int timestep)
 {
-	this->timestep = timestep;
+    this->timestep = timestep; 
+}
+void Parameter::setCalcTurbulenceIntensity(bool calcVelocityAndFluctuations) 
+{
+    this->calcVelocityAndFluctuations = calcVelocityAndFluctuations;
 }
 void Parameter::setCalcMedian(bool calcMedian)
 {
-	ic.calcMedian = calcMedian;
+    ic.calcMedian = calcMedian;
 }
 void Parameter::setCalcDragLift(bool calcDragLift)
 {
-	this->calcDragLift = calcDragLift;
+    this->calcDragLift = calcDragLift;
 }
 void Parameter::setCalcCp(bool calcCp)
 {
-	this->calcCp = calcCp;
+    this->calcCp = calcCp;
 }
 void Parameter::setWriteVeloASCIIfiles(bool writeVeloASCII)
 {
-	this->writeVeloASCII = writeVeloASCII;
+    this->writeVeloASCII = writeVeloASCII;
 }
 void Parameter::setCalcPlaneConc(bool calcPlaneConc)
 {
-	this->calcPlaneConc = calcPlaneConc;
+    this->calcPlaneConc = calcPlaneConc;
 }
 void Parameter::setTimeCalcMedStart(int CalcMedStart)
 {		
-	ic.tCalcMedStart = CalcMedStart;
+    ic.tCalcMedStart = CalcMedStart;
 }
 void Parameter::setTimeCalcMedEnd(int CalcMedEnd)
 {
-	ic.tCalcMedEnd = CalcMedEnd;
+    ic.tCalcMedEnd = CalcMedEnd;
 }
 void Parameter::setOutputPath(std::string oPath)
 {
-	ic.oPath = oPath;
+    ic.oPath = oPath;
 }
 void Parameter::setOutputPrefix(std::string oPrefix)
 {
-	//std::string test = fname;
-	ic.oPrefix = oPrefix;
+    //std::string test = fname;
+    ic.oPrefix = oPrefix;
 }
 void Parameter::setFName(std::string fname)
 {
-	//std::string test = fname;
-	ic.fname = fname;
+    //std::string test = fname;
+    ic.fname = fname;
 }
 void Parameter::setPrintFiles(bool printfiles)
 {
-	ic.printFiles = printfiles;
+    ic.printFiles = printfiles;
 }
 void Parameter::setReadGeo(bool readGeo)
 {
-	ic.readGeo = readGeo;
+    ic.readGeo = readGeo;
 }
 void Parameter::setDiffusivity(real Diffusivity)
 {
-	ic.Diffusivity = Diffusivity;
+    ic.Diffusivity = Diffusivity;
 }
 void Parameter::setTemperatureInit(real Temp)
 {
-	ic.Temp = Temp;
+    ic.Temp = Temp;
 }
 void Parameter::setTemperatureBC(real TempBC)
 {
-	ic.TempBC = TempBC;
+    ic.TempBC = TempBC;
 }
 void Parameter::setViscosity(real Viscosity)
 {
-	ic.vis = Viscosity;
+    ic.vis = Viscosity;
 }
 void Parameter::setVelocity(real Velocity)
 {
-	ic.u0 = Velocity;
+    ic.u0 = Velocity;
 }
 void Parameter::setViscosityRatio(real ViscosityRatio)
 {
-	ic.vis_ratio = ViscosityRatio;
+    ic.vis_ratio = ViscosityRatio;
 }
 void Parameter::setVelocityRatio(real VelocityRatio)
 {
-	ic.u0_ratio = VelocityRatio;
+    ic.u0_ratio = VelocityRatio;
 }
 void Parameter::setDensityRatio(real DensityRatio)
 {
-	ic.delta_rho = DensityRatio;
+    ic.delta_rho = DensityRatio;
 }
 void Parameter::setPressRatio(real PressRatio)
 {
-	ic.delta_press = PressRatio;
+    ic.delta_press = PressRatio;
 }
 void Parameter::setRealX(real RealX)
 {
-	ic.RealX = RealX;
+    ic.RealX = RealX;
 }
 void Parameter::setRealY(real RealY)
 {
-	ic.RealY = RealY;
+    ic.RealY = RealY;
 }
 void Parameter::setPressInID(unsigned int PressInID)
 {
-	ic.PressInID = PressInID;
+    ic.PressInID = PressInID;
 }
 void Parameter::setPressOutID(unsigned int PressOutID)
 {
-	ic.PressOutID = PressOutID;
+    ic.PressOutID = PressOutID;
 }
 void Parameter::setPressInZ(unsigned int PressInZ)
 {
-	ic.PressInZ = PressInZ;
+    ic.PressInZ = PressInZ;
 }
 void Parameter::setPressOutZ(unsigned int PressOutZ)
 {
-	ic.PressOutZ = PressOutZ;
+    ic.PressOutZ = PressOutZ;
 }
 void Parameter::setMaxDev(int maxdev)
 {
-	ic.maxdev = maxdev;
+    ic.maxdev = maxdev;
 }
 void Parameter::setMyID(int myid)
 {
-	ic.myid = myid;
+    ic.myid = myid;
 }
 void Parameter::setNumprocs(int numprocs)
 {
-	ic.numprocs = numprocs;
+    ic.numprocs = numprocs;
 }
 void Parameter::setDevices(std::vector<uint> devices)
 {
-	ic.devices = devices;
+    ic.devices = devices;
 }
 void Parameter::setGeometryFileC(std::string GeometryFileC)
 {
-	ic.geometryFileC = GeometryFileC;
+    ic.geometryFileC = GeometryFileC;
 }
 void Parameter::setGeometryFileM(std::string GeometryFileM)
 {
-	ic.geometryFileM = GeometryFileM;
+    ic.geometryFileM = GeometryFileM;
 }
 void Parameter::setGeometryFileF(std::string GeometryFileF)
 {
-	ic.geometryFileF = GeometryFileF;
+    ic.geometryFileF = GeometryFileF;
 }
 void Parameter::setRe(real Re)
 {
-	ic.Re = Re;
+    ic.Re = Re;
 }
 void Parameter::setFactorPressBC(real factorPressBC)
 {
-	ic.factorPressBC = factorPressBC;
+    ic.factorPressBC = factorPressBC;
 }
 void Parameter::setIsGeo(bool isGeo)
 {
-	ic.isGeo = isGeo;
+    ic.isGeo = isGeo;
 }
 void Parameter::setIsGeoNormal(bool isGeoNormal)
 {
-	ic.isGeoNormal = isGeoNormal;
+    ic.isGeoNormal = isGeoNormal;
 }
 void Parameter::setIsInflowNormal(bool isInflowNormal)
 {
-	ic.isInflowNormal = isInflowNormal;
+    ic.isInflowNormal = isInflowNormal;
 }
 void Parameter::setIsOutflowNormal(bool isOutflowNormal)
 {
-	ic.isOutflowNormal = isOutflowNormal;
+    ic.isOutflowNormal = isOutflowNormal;
 }
 void Parameter::setIsProp(bool isProp)
 {
-	ic.isProp = isProp;
+    ic.isProp = isProp;
 }
 void Parameter::setIsCp(bool isCp)
 {
-	ic.isCp = isCp;
+    ic.isCp = isCp;
 }
 void Parameter::setConcFile(bool concFile)
 {
-	ic.isConc = concFile;
+    ic.isConc = concFile;
 }
 void Parameter::setStreetVelocityFile(bool streetVelocityFile)
 {
-	ic.streetVelocityFile = streetVelocityFile;
+    ic.streetVelocityFile = streetVelocityFile;
 }
 void Parameter::setUseMeasurePoints(bool useMeasurePoints)
 {
-	ic.isMeasurePoints = useMeasurePoints;
+    ic.isMeasurePoints = useMeasurePoints;
 }
 void Parameter::setUseWale(bool useWale)
 {
-	ic.isWale = useWale;
+    ic.isWale = useWale;
 }
 void Parameter::setUseInitNeq(bool useInitNeq)
 {
-	ic.isInitNeq = useInitNeq;
+    ic.isInitNeq = useInitNeq;
 }
 void Parameter::setSimulatePorousMedia(bool simulatePorousMedia)
 {
-	ic.simulatePorousMedia = simulatePorousMedia;
+    ic.simulatePorousMedia = simulatePorousMedia;
 }
 
 void Parameter::setIsF3(bool isF3)
 {
-	this->isF3 = isF3; 
+    this->isF3 = isF3; 
 }
 
 void Parameter::setIsBodyForce(bool isBodyForce) 
 {
-	this->isBodyForce = isBodyForce;
+    this->isBodyForce = isBodyForce;
 }
 
 void Parameter::setGridX(std::vector<int> GridX)
 {
-	ic.GridX = GridX;
+    ic.GridX = GridX;
 }
 void Parameter::setGridY(std::vector<int> GridY)
 {
-	ic.GridY = GridY;
+    ic.GridY = GridY;
 }
 void Parameter::setGridZ(std::vector<int> GridZ)
 {
-	ic.GridZ = GridZ;
+    ic.GridZ = GridZ;
 }
 void Parameter::setDistX(std::vector<int> DistX)
 {
-	ic.DistX = DistX;
+    ic.DistX = DistX;
 }
 void Parameter::setDistY(std::vector<int> DistY)
 {
-	ic.DistY = DistY;
+    ic.DistY = DistY;
 }
 void Parameter::setDistZ(std::vector<int> DistZ)
 {
-	ic.DistZ = DistZ;
+    ic.DistZ = DistZ;
 }
 void Parameter::setScaleLBMtoSI(std::vector<real> scaleLBMtoSI)
 {
-	ic.scaleLBMtoSI = scaleLBMtoSI;
+    ic.scaleLBMtoSI = scaleLBMtoSI;
 }
 void Parameter::setTranslateLBMtoSI(std::vector<real> translateLBMtoSI)
 {
-	ic.translateLBMtoSI = translateLBMtoSI;
+    ic.translateLBMtoSI = translateLBMtoSI;
 }
 void Parameter::setMinCoordX(std::vector<real> MinCoordX)
 {
-	ic.minCoordX = MinCoordX;
+    ic.minCoordX = MinCoordX;
 }
 void Parameter::setMinCoordY(std::vector<real> MinCoordY)
 {
-	ic.minCoordY = MinCoordY;
+    ic.minCoordY = MinCoordY;
 }
 void Parameter::setMinCoordZ(std::vector<real> MinCoordZ)
 {
-	ic.minCoordZ = MinCoordZ;
+    ic.minCoordZ = MinCoordZ;
 }
 void Parameter::setMaxCoordX(std::vector<real> MaxCoordX)
 {
-	ic.maxCoordX = MaxCoordX;
+    ic.maxCoordX = MaxCoordX;
 }
 void Parameter::setMaxCoordY(std::vector<real> MaxCoordY)
 {
-	ic.maxCoordY = MaxCoordY;
+    ic.maxCoordY = MaxCoordY;
 }
 void Parameter::setMaxCoordZ(std::vector<real> MaxCoordZ)
 {
-	ic.maxCoordZ = MaxCoordZ;
+    ic.maxCoordZ = MaxCoordZ;
 }
 void Parameter::setTempH(TempforBoundaryConditions* TempH)
 {
-	this->TempH = TempH;
+    this->TempH = TempH;
 }
 void Parameter::setTempD(TempforBoundaryConditions* TempD)
 {
-	this->TempD = TempD;
+    this->TempD = TempD;
 }
 void Parameter::setTempVelH(TempVelforBoundaryConditions* TempVelH)
 {
-	this->TempVelH = TempVelH;
+    this->TempVelH = TempVelH;
 }
 void Parameter::setTempVelD(TempVelforBoundaryConditions* TempVelD)
 {
-	this->TempVelD = TempVelD;
+    this->TempVelD = TempVelD;
 }
 void Parameter::setTempPressH(TempPressforBoundaryConditions* TempPressH)
 {
-	this->TempPressH = TempPressH;
+    this->TempPressH = TempPressH;
 }
 void Parameter::setTempPressD(TempPressforBoundaryConditions* TempPressD)
 {
-	this->TempPressD = TempPressD;
+    this->TempPressD = TempPressD;
 }
 //void Parameter::setkInflowQ(unsigned int kInflowQ)
 //{
@@ -976,463 +993,463 @@ void Parameter::setTempPressD(TempPressforBoundaryConditions* TempPressD)
 //}
 void Parameter::setkFull(std::string kFull)
 {
-	ic.kFull = kFull;
+    ic.kFull = kFull;
 }
 void Parameter::setgeoFull(std::string geoFull)
 {
-	ic.geoFull = geoFull;
+    ic.geoFull = geoFull;
 }
 void Parameter::setgeoVec(std::string geoVec)
 {
-	ic.geoVec = geoVec;
+    ic.geoVec = geoVec;
 }
 void Parameter::setcoordX(std::string coordX)
 {
-	ic.coordX = coordX;
+    ic.coordX = coordX;
 }
 void Parameter::setcoordY(std::string coordY)
 {
-	ic.coordY = coordY;
+    ic.coordY = coordY;
 }
 void Parameter::setcoordZ(std::string coordZ)
 {
-	ic.coordZ = coordZ;
+    ic.coordZ = coordZ;
 }
 void Parameter::setneighborX(std::string neighborX)
 {
-	ic.neighborX = neighborX;
+    ic.neighborX = neighborX;
 }
 void Parameter::setneighborY(std::string neighborY)
 {
-	ic.neighborY = neighborY;
+    ic.neighborY = neighborY;
 }
 void Parameter::setneighborZ(std::string neighborZ)
 {
-	ic.neighborZ = neighborZ;
+    ic.neighborZ = neighborZ;
 }
 void Parameter::setneighborWSB(std::string neighborWSB)
 {
-	ic.neighborWSB = neighborWSB;
+    ic.neighborWSB = neighborWSB;
 }
 void Parameter::setscaleCFC(std::string scaleCFC)
 {
-	ic.scaleCFC = scaleCFC;
+    ic.scaleCFC = scaleCFC;
 }
 void Parameter::setscaleCFF(std::string scaleCFF)
 {
-	ic.scaleCFF = scaleCFF;
+    ic.scaleCFF = scaleCFF;
 }
 void Parameter::setscaleFCC(std::string scaleFCC)
 {
-	ic.scaleFCC = scaleFCC;
+    ic.scaleFCC = scaleFCC;
 }
 void Parameter::setscaleFCF(std::string scaleFCF)
 {
-	ic.scaleFCF = scaleFCF;
+    ic.scaleFCF = scaleFCF;
 }
 void Parameter::setscaleOffsetCF(std::string scaleOffsetCF)
 {
-	ic.scaleOffsetCF = scaleOffsetCF;
+    ic.scaleOffsetCF = scaleOffsetCF;
 }
 void Parameter::setscaleOffsetFC(std::string scaleOffsetFC)
 {
-	ic.scaleOffsetFC = scaleOffsetFC;
+    ic.scaleOffsetFC = scaleOffsetFC;
 }
 void Parameter::setgeomBoundaryBcQs(std::string geomBoundaryBcQs)
 {
-	ic.geomBoundaryBcQs = geomBoundaryBcQs;
+    ic.geomBoundaryBcQs = geomBoundaryBcQs;
 }
 void Parameter::setgeomBoundaryBcValues(std::string geomBoundaryBcValues)
 {
-	ic.geomBoundaryBcValues = geomBoundaryBcValues;
+    ic.geomBoundaryBcValues = geomBoundaryBcValues;
 }
 void Parameter::setnoSlipBcPos(std::string noSlipBcPos)
 {
-	ic.noSlipBcPos = noSlipBcPos;
+    ic.noSlipBcPos = noSlipBcPos;
 }
 void Parameter::setnoSlipBcQs(std::string noSlipBcQs)
 {
-	ic.noSlipBcQs = noSlipBcQs;
+    ic.noSlipBcQs = noSlipBcQs;
 }
 void Parameter::setnoSlipBcValue(std::string noSlipBcValue)
 {
-	ic.noSlipBcValue = noSlipBcValue;
+    ic.noSlipBcValue = noSlipBcValue;
 }
 void Parameter::setnoSlipBcValues(std::string noSlipBcValues)
 {
-	ic.noSlipBcValues = noSlipBcValues;
+    ic.noSlipBcValues = noSlipBcValues;
 }
 void Parameter::setslipBcPos(std::string slipBcPos)
 {
-	ic.slipBcPos = slipBcPos;
+    ic.slipBcPos = slipBcPos;
 }
 void Parameter::setslipBcQs(std::string slipBcQs)
 {
-	ic.slipBcQs = slipBcQs;
+    ic.slipBcQs = slipBcQs;
 }
 void Parameter::setslipBcValue(std::string slipBcValue)
 {
-	ic.slipBcValue = slipBcValue;
+    ic.slipBcValue = slipBcValue;
 }
 void Parameter::setpressBcPos(std::string pressBcPos)
 {
-	ic.pressBcPos = pressBcPos;
+    ic.pressBcPos = pressBcPos;
 }
 void Parameter::setpressBcQs(std::string pressBcQs)
 {
-	ic.pressBcQs = pressBcQs;
+    ic.pressBcQs = pressBcQs;
 }
 void Parameter::setpressBcValue(std::string pressBcValue)
 {
-	ic.pressBcValue = pressBcValue;
+    ic.pressBcValue = pressBcValue;
 }
 void Parameter::setpressBcValues(std::string pressBcValues)
 {
-	ic.pressBcValues = pressBcValues;
+    ic.pressBcValues = pressBcValues;
 }
 void Parameter::setvelBcQs(std::string velBcQs)
 {
-	ic.velBcQs = velBcQs;
+    ic.velBcQs = velBcQs;
 }
 void Parameter::setvelBcValues(std::string velBcValues)
 {
-	ic.velBcValues = velBcValues;
+    ic.velBcValues = velBcValues;
 }
 void Parameter::setinletBcQs(std::string inletBcQs)
 {
-	ic.inletBcQs = inletBcQs;
+    ic.inletBcQs = inletBcQs;
 }
 void Parameter::setinletBcValues(std::string inletBcValues)
 {
-	ic.inletBcValues = inletBcValues;
+    ic.inletBcValues = inletBcValues;
 }
 void Parameter::setoutletBcQs(std::string outletBcQs)
 {
-	ic.outletBcQs = outletBcQs;
+    ic.outletBcQs = outletBcQs;
 }
 void Parameter::setoutletBcValues(std::string outletBcValues)
 {
-	ic.outletBcValues = outletBcValues;
+    ic.outletBcValues = outletBcValues;
 }
 void Parameter::settopBcQs(std::string topBcQs)
 {
-	ic.topBcQs = topBcQs;
+    ic.topBcQs = topBcQs;
 }
 void Parameter::settopBcValues(std::string topBcValues)
 {
-	ic.topBcValues = topBcValues;
+    ic.topBcValues = topBcValues;
 }
 void Parameter::setbottomBcQs(std::string bottomBcQs)
 {
-	ic.bottomBcQs = bottomBcQs;
+    ic.bottomBcQs = bottomBcQs;
 }
 void Parameter::setbottomBcValues(std::string bottomBcValues)
 {
-	ic.bottomBcValues = bottomBcValues;
+    ic.bottomBcValues = bottomBcValues;
 }
 void Parameter::setfrontBcQs(std::string frontBcQs)
 {
-	ic.frontBcQs = frontBcQs;
+    ic.frontBcQs = frontBcQs;
 }
 void Parameter::setfrontBcValues(std::string frontBcValues)
 {
-	ic.frontBcValues = frontBcValues;
+    ic.frontBcValues = frontBcValues;
 }
 void Parameter::setbackBcQs(std::string backBcQs)
 {
-	ic.backBcQs = backBcQs;
+    ic.backBcQs = backBcQs;
 }
 void Parameter::setbackBcValues(std::string backBcValues)
 {
-	ic.backBcValues = backBcValues;
+    ic.backBcValues = backBcValues;
 }
 void Parameter::setwallBcQs(std::string wallBcQs)
 {
-	ic.wallBcQs = wallBcQs;
+    ic.wallBcQs = wallBcQs;
 }
 void Parameter::setwallBcValues(std::string wallBcValues)
 {
-	ic.wallBcValues = wallBcValues;
+    ic.wallBcValues = wallBcValues;
 }
 void Parameter::setperiodicBcQs(std::string periodicBcQs)
 {
-	ic.periodicBcQs = periodicBcQs;
+    ic.periodicBcQs = periodicBcQs;
 }
 void Parameter::setperiodicBcValues(std::string periodicBcValues)
 {
-	ic.periodicBcValues = periodicBcValues;
+    ic.periodicBcValues = periodicBcValues;
 }
 void Parameter::setpropellerQs(std::string propellerQs)
 {
-	ic.propellerQs = propellerQs;
+    ic.propellerQs = propellerQs;
 }
 void Parameter::setpropellerValues(std::string propellerValues)
 {
-	ic.propellerValues = propellerValues;
+    ic.propellerValues = propellerValues;
 }
 void Parameter::setpropellerCylinder(std::string propellerCylinder)
 {
-	ic.propellerCylinder = propellerCylinder;
+    ic.propellerCylinder = propellerCylinder;
 }
 void Parameter::setmeasurePoints(std::string measurePoints)
 {
-	ic.measurePoints = measurePoints;
+    ic.measurePoints = measurePoints;
 }
 void Parameter::setnumberNodes(std::string numberNodes)
 {
-	ic.numberNodes = numberNodes;
+    ic.numberNodes = numberNodes;
 }
 void Parameter::setLBMvsSI(std::string LBMvsSI)
 {
-	ic.LBMvsSI = LBMvsSI;
+    ic.LBMvsSI = LBMvsSI;
 }
 void Parameter::setcpTop(std::string cpTop)
 {
-	ic.cpTop = cpTop;
+    ic.cpTop = cpTop;
 }
 void Parameter::setcpBottom(std::string cpBottom)
 {
-	ic.cpBottom = cpBottom;
+    ic.cpBottom = cpBottom;
 }
 void Parameter::setcpBottom2(std::string cpBottom2)
 {
-	ic.cpBottom2 = cpBottom2;
+    ic.cpBottom2 = cpBottom2;
 }
 void Parameter::setConcentration(std::string concFile)
 {
-	ic.concentration = concFile;
+    ic.concentration = concFile;
 }
 void Parameter::setStreetVelocity(std::string streetVelocity)
 {
-	ic.streetVelocity = streetVelocity;
+    ic.streetVelocity = streetVelocity;
 }
 void Parameter::setclockCycleForMP(real clockCycleForMP)
 {
-	ic.clockCycleForMP = clockCycleForMP;
+    ic.clockCycleForMP = clockCycleForMP;
 }
 void Parameter::setTimeDoCheckPoint(unsigned int tDoCheckPoint)
 {
-	ic.tDoCheckPoint = tDoCheckPoint;
+    ic.tDoCheckPoint = tDoCheckPoint;
 }
 void Parameter::setTimeDoRestart(unsigned int tDoRestart)
 {
-	ic.tDoRestart = tDoRestart;
+    ic.tDoRestart = tDoRestart;
 }
 void Parameter::setDoCheckPoint(bool doCheckPoint)
 {
-	ic.doCheckPoint = doCheckPoint;
+    ic.doCheckPoint = doCheckPoint;
 }
 void Parameter::setDoRestart(bool doRestart)
 {
-	ic.doRestart = doRestart;
+    ic.doRestart = doRestart;
 }
 void Parameter::settimestepForMP(unsigned int timestepForMP)
 {
-	ic.timeStepForMP = timestepForMP;
+    ic.timeStepForMP = timestepForMP;
 }
 void Parameter::setObj(std::string str, bool isObj)
 {
-	if (str == "geo")
-	{
-		this->setIsGeo(isObj);
-	}
-	else if (str == "prop")
-	{
-		this->setIsProp(isObj);
-	}
-	else if (str == "cp")
-	{
-		this->setIsCp(isObj);
-	}
-	else if (str == "geoNormal")
-	{
-		this->setIsGeoNormal(isObj);
-	}
-	else if (str == "inflowNormal")
-	{
-		this->setIsInflowNormal(isObj);
-	}
-	else if (str == "outflowNormal")
-	{
-		this->setIsOutflowNormal(isObj);
-	}
+    if (str == "geo")
+    {
+        this->setIsGeo(isObj);
+    }
+    else if (str == "prop")
+    {
+        this->setIsProp(isObj);
+    }
+    else if (str == "cp")
+    {
+        this->setIsCp(isObj);
+    }
+    else if (str == "geoNormal")
+    {
+        this->setIsGeoNormal(isObj);
+    }
+    else if (str == "inflowNormal")
+    {
+        this->setIsInflowNormal(isObj);
+    }
+    else if (str == "outflowNormal")
+    {
+        this->setIsOutflowNormal(isObj);
+    }
 }
 void Parameter::setGeometryValues(bool GeometryValues)
 {
-	ic.GeometryValues = GeometryValues;
+    ic.GeometryValues = GeometryValues;
 }
 void Parameter::setCalc2ndOrderMoments(bool is2ndOrderMoments)
 {
-	ic.is2ndOrderMoments = is2ndOrderMoments;
+    ic.is2ndOrderMoments = is2ndOrderMoments;
 }
 void Parameter::setCalc3rdOrderMoments(bool is3rdOrderMoments)
 {
-	ic.is3rdOrderMoments = is3rdOrderMoments;
+    ic.is3rdOrderMoments = is3rdOrderMoments;
 }
 void Parameter::setCalcHighOrderMoments(bool isHighOrderMoments)
 {
-	ic.isHighOrderMoments = isHighOrderMoments;
+    ic.isHighOrderMoments = isHighOrderMoments;
 }
 void Parameter::setMemsizeGPU(double admem, bool reset)
 {
-	if (reset == true)
-	{
-		this->memsizeGPU = 0.;
-	} 
-	else
-	{
-		this->memsizeGPU += admem;
-	}
+    if (reset == true)
+    {
+        this->memsizeGPU = 0.;
+    } 
+    else
+    {
+        this->memsizeGPU += admem;
+    }
 }
 //1D domain decomposition
 void Parameter::setPossNeighborFiles(std::vector<std::string> possNeighborFiles, std::string sor)
 {
-	if (sor=="send")
-	{
-		this->possNeighborFilesSend = possNeighborFiles;
-	} 
-	else if (sor == "recv")
-	{
-		this->possNeighborFilesRecv = possNeighborFiles;
-	}
+    if (sor=="send")
+    {
+        this->possNeighborFilesSend = possNeighborFiles;
+    } 
+    else if (sor == "recv")
+    {
+        this->possNeighborFilesRecv = possNeighborFiles;
+    }
 }
 void Parameter::setNumberOfProcessNeighbors(unsigned int numberOfProcessNeighbors, int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		parH[level]->sendProcessNeighbor.resize(numberOfProcessNeighbors);
-		parD[level]->sendProcessNeighbor.resize(numberOfProcessNeighbors);
-	} 
-	else if (sor == "recv")
-	{
-		parH[level]->recvProcessNeighbor.resize(numberOfProcessNeighbors);
-		parD[level]->recvProcessNeighbor.resize(numberOfProcessNeighbors);
-	}
+    if (sor=="send")
+    {
+        parH[level]->sendProcessNeighbor.resize(numberOfProcessNeighbors);
+        parD[level]->sendProcessNeighbor.resize(numberOfProcessNeighbors);
+    } 
+    else if (sor == "recv")
+    {
+        parH[level]->recvProcessNeighbor.resize(numberOfProcessNeighbors);
+        parD[level]->recvProcessNeighbor.resize(numberOfProcessNeighbors);
+    }
 }
 void Parameter::setIsNeighbor(bool isNeigbor)
 {
-	this->isNeigbor = isNeigbor;
+    this->isNeigbor = isNeigbor;
 }
 //3D domain decomposition
 void Parameter::setPossNeighborFilesX(std::vector<std::string> possNeighborFiles, std::string sor)
 {
-	if (sor=="send")
-	{
-		this->possNeighborFilesSendX = possNeighborFiles;
-	} 
-	else if (sor == "recv")
-	{
-		this->possNeighborFilesRecvX = possNeighborFiles;
-	}
+    if (sor=="send")
+    {
+        this->possNeighborFilesSendX = possNeighborFiles;
+    } 
+    else if (sor == "recv")
+    {
+        this->possNeighborFilesRecvX = possNeighborFiles;
+    }
 }
 void Parameter::setPossNeighborFilesY(std::vector<std::string> possNeighborFiles, std::string sor)
 {
-	if (sor=="send")
-	{
-		this->possNeighborFilesSendY = possNeighborFiles;
-	} 
-	else if (sor == "recv")
-	{
-		this->possNeighborFilesRecvY = possNeighborFiles;
-	}
+    if (sor=="send")
+    {
+        this->possNeighborFilesSendY = possNeighborFiles;
+    } 
+    else if (sor == "recv")
+    {
+        this->possNeighborFilesRecvY = possNeighborFiles;
+    }
 }
 void Parameter::setPossNeighborFilesZ(std::vector<std::string> possNeighborFiles, std::string sor)
 {
-	if (sor=="send")
-	{
-		this->possNeighborFilesSendZ = possNeighborFiles;
-	} 
-	else if (sor == "recv")
-	{
-		this->possNeighborFilesRecvZ = possNeighborFiles;
-	}
+    if (sor=="send")
+    {
+        this->possNeighborFilesSendZ = possNeighborFiles;
+    } 
+    else if (sor == "recv")
+    {
+        this->possNeighborFilesRecvZ = possNeighborFiles;
+    }
 }
 void Parameter::setNumberOfProcessNeighborsX(unsigned int numberOfProcessNeighbors, int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		parH[level]->sendProcessNeighborX.resize(numberOfProcessNeighbors);
-		parD[level]->sendProcessNeighborX.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->sendProcessNeighborADX.resize(numberOfProcessNeighbors);
-			parD[level]->sendProcessNeighborADX.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	} 
-	else if (sor == "recv")
-	{
-		parH[level]->recvProcessNeighborX.resize(numberOfProcessNeighbors);
-		parD[level]->recvProcessNeighborX.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->recvProcessNeighborADX.resize(numberOfProcessNeighbors);
-			parD[level]->recvProcessNeighborADX.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	}
+    if (sor=="send")
+    {
+        parH[level]->sendProcessNeighborX.resize(numberOfProcessNeighbors);
+        parD[level]->sendProcessNeighborX.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->sendProcessNeighborADX.resize(numberOfProcessNeighbors);
+            parD[level]->sendProcessNeighborADX.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    } 
+    else if (sor == "recv")
+    {
+        parH[level]->recvProcessNeighborX.resize(numberOfProcessNeighbors);
+        parD[level]->recvProcessNeighborX.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->recvProcessNeighborADX.resize(numberOfProcessNeighbors);
+            parD[level]->recvProcessNeighborADX.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    }
 }
 void Parameter::setNumberOfProcessNeighborsY(unsigned int numberOfProcessNeighbors, int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		parH[level]->sendProcessNeighborY.resize(numberOfProcessNeighbors);
-		parD[level]->sendProcessNeighborY.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->sendProcessNeighborADY.resize(numberOfProcessNeighbors);
-			parD[level]->sendProcessNeighborADY.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	} 
-	else if (sor == "recv")
-	{
-		parH[level]->recvProcessNeighborY.resize(numberOfProcessNeighbors);
-		parD[level]->recvProcessNeighborY.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->recvProcessNeighborADY.resize(numberOfProcessNeighbors);
-			parD[level]->recvProcessNeighborADY.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	}
+    if (sor=="send")
+    {
+        parH[level]->sendProcessNeighborY.resize(numberOfProcessNeighbors);
+        parD[level]->sendProcessNeighborY.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->sendProcessNeighborADY.resize(numberOfProcessNeighbors);
+            parD[level]->sendProcessNeighborADY.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    } 
+    else if (sor == "recv")
+    {
+        parH[level]->recvProcessNeighborY.resize(numberOfProcessNeighbors);
+        parD[level]->recvProcessNeighborY.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->recvProcessNeighborADY.resize(numberOfProcessNeighbors);
+            parD[level]->recvProcessNeighborADY.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    }
 }
 void Parameter::setNumberOfProcessNeighborsZ(unsigned int numberOfProcessNeighbors, int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		parH[level]->sendProcessNeighborZ.resize(numberOfProcessNeighbors);
-		parD[level]->sendProcessNeighborZ.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->sendProcessNeighborADZ.resize(numberOfProcessNeighbors);
-			parD[level]->sendProcessNeighborADZ.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	} 
-	else if (sor == "recv")
-	{
-		parH[level]->recvProcessNeighborZ.resize(numberOfProcessNeighbors);
-		parD[level]->recvProcessNeighborZ.resize(numberOfProcessNeighbors);
-		//////////////////////////////////////////////////////////////////////////
-		if (getDiffOn()==true){
-			parH[level]->recvProcessNeighborADZ.resize(numberOfProcessNeighbors);
-			parD[level]->recvProcessNeighborADZ.resize(numberOfProcessNeighbors);
-		}
-		//////////////////////////////////////////////////////////////////////////
-	}
+    if (sor=="send")
+    {
+        parH[level]->sendProcessNeighborZ.resize(numberOfProcessNeighbors);
+        parD[level]->sendProcessNeighborZ.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->sendProcessNeighborADZ.resize(numberOfProcessNeighbors);
+            parD[level]->sendProcessNeighborADZ.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    } 
+    else if (sor == "recv")
+    {
+        parH[level]->recvProcessNeighborZ.resize(numberOfProcessNeighbors);
+        parD[level]->recvProcessNeighborZ.resize(numberOfProcessNeighbors);
+        //////////////////////////////////////////////////////////////////////////
+        if (getDiffOn()==true){
+            parH[level]->recvProcessNeighborADZ.resize(numberOfProcessNeighbors);
+            parD[level]->recvProcessNeighborADZ.resize(numberOfProcessNeighbors);
+        }
+        //////////////////////////////////////////////////////////////////////////
+    }
 }
 void Parameter::setIsNeighborX(bool isNeigbor)
 {
-	this->isNeigborX = isNeigbor;
+    this->isNeigborX = isNeigbor;
 }
 void Parameter::setIsNeighborY(bool isNeigbor)
 {
-	this->isNeigborY = isNeigbor;
+    this->isNeigborY = isNeigbor;
 }
 void Parameter::setIsNeighborZ(bool isNeigbor)
 {
-	this->isNeigborZ = isNeigbor; }
+    this->isNeigborZ = isNeigbor; }
 void Parameter::setSendProcessNeighborsAfterFtoCX(int numberOfNodes, int level, int arrayIndex) {
     this->getParH(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
     this->getParD(level)->sendProcessNeighborsAfterFtoCX[arrayIndex].numberOfNodes = numberOfNodes;
@@ -1488,61 +1505,61 @@ void Parameter::setRecvProcessNeighborsAfterFtoCZ(int numberOfNodes, int level,
 }
 void Parameter::setgeomBoundaryNormalX(std::string geomNormalX)
 {
-	ic.geomNormalX = geomNormalX;
+    ic.geomNormalX = geomNormalX;
 }
 void Parameter::setgeomBoundaryNormalY(std::string geomNormalY)
 {
-	ic.geomNormalY = geomNormalY;
+    ic.geomNormalY = geomNormalY;
 }
 void Parameter::setgeomBoundaryNormalZ(std::string geomNormalZ)
 {
-	ic.geomNormalZ = geomNormalZ;
+    ic.geomNormalZ = geomNormalZ;
 }
 void Parameter::setInflowBoundaryNormalX(std::string inflowNormalX)
 {
-	ic.inflowNormalX = inflowNormalX;
+    ic.inflowNormalX = inflowNormalX;
 }
 void Parameter::setInflowBoundaryNormalY(std::string inflowNormalY)
 {
-	ic.inflowNormalY = inflowNormalY;
+    ic.inflowNormalY = inflowNormalY;
 }
 void Parameter::setInflowBoundaryNormalZ(std::string inflowNormalZ)
 {
-	ic.inflowNormalZ = inflowNormalZ;
+    ic.inflowNormalZ = inflowNormalZ;
 }
 void Parameter::setOutflowBoundaryNormalX(std::string outflowNormalX)
 {
-	ic.outflowNormalX = outflowNormalX;
+    ic.outflowNormalX = outflowNormalX;
 }
 void Parameter::setOutflowBoundaryNormalY(std::string outflowNormalY)
 {
-	ic.outflowNormalY = outflowNormalY;
+    ic.outflowNormalY = outflowNormalY;
 }
 void Parameter::setOutflowBoundaryNormalZ(std::string outflowNormalZ)
 {
-	ic.outflowNormalZ = outflowNormalZ;
+    ic.outflowNormalZ = outflowNormalZ;
 }
 void Parameter::setMainKernel(std::string kernel)
 {
-	this->mainKernel = kernel;
+    this->mainKernel = kernel;
     if (kernel.find("Stream") != std::string::npos)
         this->kernelNeedsFluidNodeIndicesToRun = true;
 }
 void Parameter::setMultiKernelOn(bool isOn)
 {
-	this->multiKernelOn = isOn;
+    this->multiKernelOn = isOn;
 }
 void Parameter::setMultiKernelLevel(std::vector< int> kernelLevel)
 {
-	this->multiKernelLevel = kernelLevel;
+    this->multiKernelLevel = kernelLevel;
 }
 void Parameter::setMultiKernel(std::vector< std::string> kernel)
 {
-	this->multiKernel = kernel;
+    this->multiKernel = kernel;
 }
 void Parameter::setADKernel(std::string adKernel)
 {
-	this->adKernel = adKernel;
+    this->adKernel = adKernel;
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -1553,15 +1570,15 @@ void Parameter::setADKernel(std::string adKernel)
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 double* Parameter::getForcesDouble()
 {
-	return this->hostForcing;
+    return this->hostForcing;
 }
 real* Parameter::getForcesHost()
 {
-	return this->forcingH;
+    return this->forcingH;
 }
 real* Parameter::getForcesDev()
 {
-	return this->forcingD;
+    return this->forcingD;
 }
 double * Parameter::getQuadricLimitersDouble()
 {
@@ -1577,373 +1594,377 @@ real * Parameter::getQuadricLimitersDev()
 }
 real Parameter::getPhi()
 {
-	return Phi;
+    return Phi;
 }
 real Parameter::getAngularVelocity()
 {
-	return angularVelocity;
+    return angularVelocity;
 }
 real Parameter::getStartXHotWall()
 {
-	return this->startXHotWall;
+    return this->startXHotWall;
 }
 real Parameter::getEndXHotWall()
 {
-	return this->endXHotWall;
+    return this->endXHotWall;
 }
 unsigned int Parameter::getStepEnsight()
 {
-	return this->stepEnsight;
+    return this->stepEnsight;
 }
 unsigned int Parameter::getOutputCount()
 {
-	return this->outputCount;
+    return this->outputCount;
 }
 unsigned int Parameter::getlimitOfNodesForVTK()
 {
-	return this->limitOfNodesForVTK;
+    return this->limitOfNodesForVTK;
 }
 unsigned int Parameter::getStartTurn()
 {
-	return startTurn;
+    return startTurn;
 }
 std::shared_ptr<LBMSimulationParameter> Parameter::getParD(int level)
 {
-	return parD[level];
+    return parD[level];
 }
 std::shared_ptr<LBMSimulationParameter> Parameter::getParH(int level)
 {
-	return parH[level];
+    return parH[level];
 }
 unsigned int Parameter::getSizeMat(int level)
 {
-	return parH[level]->size_Mat;
+    return parH[level]->size_Mat;
 }
 unsigned int Parameter::getMemSizereal(int level)
 {
-	return parH[level]->mem_size_real;
+    return parH[level]->mem_size_real;
 }
 unsigned int Parameter::getMemSizeInt(int level)
 {
-	return parH[level]->mem_size_int;
+    return parH[level]->mem_size_int;
 }
 unsigned int Parameter::getMemSizeBool(int level)
 {
-	return parH[level]->mem_size_bool;
+    return parH[level]->mem_size_bool;
 }
 unsigned int Parameter::getMemSizerealYZ(int level)
 {
-	return parH[level]->mem_size_real_yz;
+    return parH[level]->mem_size_real_yz;
 }
 int Parameter::getFine()
 {
-	return fine;
+    return fine;
 }
 int Parameter::getCoarse()
 {
-	return coarse;
+    return coarse;
 }
 int Parameter::getParticleBasicLevel()
 {
-	return this->particleBasicLevel;
+    return this->particleBasicLevel;
 }
 int Parameter::getParticleInitLevel()
 {
-	return this->particleInitLevel;
+    return this->particleInitLevel;
 }
 int Parameter::getNumberOfParticles()
 {
-	return this->numberOfParticles;
+    return this->numberOfParticles;
 }
 bool Parameter::getEvenOrOdd(int level)
 {
-	return parH[level]->evenOrOdd;
+    return parH[level]->evenOrOdd;
 }
 bool Parameter::getDiffOn()
 {
-	return diffOn;
+    return diffOn;
 }
 bool Parameter::getCompOn()
 {
-	return compOn;
+    return compOn;
 }
 int Parameter::getDiffMod()
 {
-	return diffMod;
+    return diffMod;
 }
 int Parameter::getFactorNZ()
 {
-	return factor_gridNZ;
+    return factor_gridNZ;
 }
 int Parameter::getD3Qxx()
 {
-	return this->D3Qxx;
+    return this->D3Qxx;
 }
 int Parameter::getMaxLevel()
 {
-	return this->maxlevel;
+    return this->maxlevel;
 }
 unsigned int Parameter::getTStart()
 {
-	if (getDoRestart())
-	{
-		return getTimeDoRestart() + 1;
-	} 
-	else
-	{
-		return 1;
-	}
+    if (getDoRestart())
+    {
+        return getTimeDoRestart() + 1;
+    } 
+    else
+    {
+        return 1;
+    }
 }
 unsigned int Parameter::getTInit()
 {
-	if (getDoRestart())
-	{
-		return getTimeDoRestart();
-	} 
-	else
-	{
-		return 0;
-	}
+    if (getDoRestart())
+    {
+        return getTimeDoRestart();
+    } 
+    else
+    {
+        return 0;
+    }
 }
 unsigned int Parameter::getTEnd()
 {
-	return ic.tend;
+    return ic.tend;
 }
 unsigned int Parameter::getTOut()
 {
-	return ic.tout;
+    return ic.tout;
 }
 unsigned int Parameter::getTStartOut()
 {
-	return ic.tStartOut;
+    return ic.tStartOut;
 }
 bool Parameter::getCalcMedian()
 {
-	return ic.calcMedian;
+    return ic.calcMedian;
 }
 bool Parameter::getCalcDragLift()
 {
-	return this->calcDragLift;
+    return this->calcDragLift;
 }
 bool Parameter::getCalcCp()
 {
-	return this->calcCp;
+    return this->calcCp;
 }
 bool Parameter::getCalcParticle()
 {
-	return this->calcParticles;
+    return this->calcParticles;
 }
 bool Parameter::getWriteVeloASCIIfiles()
 {
-	return this->writeVeloASCII;
+    return this->writeVeloASCII;
 }
 bool Parameter::getCalcPlaneConc()
 {
-	return this->calcPlaneConc;
+    return this->calcPlaneConc;
 }
 int Parameter::getTimeCalcMedStart()
 {
-	return ic.tCalcMedStart;
+    return ic.tCalcMedStart;
 }
 int Parameter::getTimeCalcMedEnd()
 {
-	return ic.tCalcMedEnd;
+    return ic.tCalcMedEnd;
 }
 std::string Parameter::getOutputPath()
 {
-	return ic.oPath;
+    return ic.oPath;
 }
 std::string Parameter::getOutputPrefix()
 {
-	return ic.oPrefix;
+    return ic.oPrefix;
 }
 std::string Parameter::getFName()
 {
-	return ic.fname;
+    return ic.fname;
 }
 bool Parameter::getPrintFiles()
 {
-	return ic.printFiles;
+    return ic.printFiles;
 }
 bool Parameter::getReadGeo()
 {
-	return ic.readGeo;
+    return ic.readGeo; 
+}
+bool Parameter::getCalcTurbulenceIntensity() 
+{ 
+    return this->calcVelocityAndFluctuations; 
 }
 real Parameter::getDiffusivity()
 {
-	return ic.Diffusivity;
+    return ic.Diffusivity;
 }
 real Parameter::getTemperatureInit()
 {
-	return ic.Temp;
+    return ic.Temp;
 }
 real Parameter::getTemperatureBC()
 {
-	return ic.TempBC;
+    return ic.TempBC;
 }
 real Parameter::getViscosity()
 {
-	return ic.vis;
+    return ic.vis;
 }
 real Parameter::getVelocity()
 {
-	return ic.u0;
+    return ic.u0;
 }
 real Parameter::getViscosityRatio()
 {
-	return ic.vis_ratio;
+    return ic.vis_ratio;
 }
 real Parameter::getVelocityRatio()
 {
-	return ic.u0_ratio;
+    return ic.u0_ratio;
 }
 real Parameter::getDensityRatio()
 {
-	return ic.delta_rho;
+    return ic.delta_rho;
 }
 real Parameter::getPressRatio()
 {
-	return ic.delta_press;
+    return ic.delta_press;
 }
 real Parameter::getRealX()
 {
-	return ic.RealX;
+    return ic.RealX;
 }
 real Parameter::getRealY()
 {
-	return ic.RealY;
+    return ic.RealY;
 }
 unsigned int Parameter::getPressInID()
 {
-	return ic.PressInID;
+    return ic.PressInID;
 }
 unsigned int Parameter::getPressOutID()
 {
-	return ic.PressOutID;
+    return ic.PressOutID;
 }
 unsigned int Parameter::getPressInZ()
 {
-	return ic.PressInZ;
+    return ic.PressInZ;
 }
 unsigned int Parameter::getPressOutZ()
 {
-	return ic.PressOutZ;
+    return ic.PressOutZ;
 }
 int Parameter::getMaxDev()
 {
-	return ic.maxdev;
+    return ic.maxdev;
 }
 int Parameter::getMyID()
 {
-	return ic.myid;
+    return ic.myid;
 }
 int Parameter::getNumprocs()
 {
-	return ic.numprocs;
+    return ic.numprocs;
 }
 std::vector<uint> Parameter::getDevices()
 {
-	return ic.devices;
+    return ic.devices;
 }
 std::string Parameter::getGeometryFileC()
 {
-	return ic.geometryFileC;
+    return ic.geometryFileC;
 }
 std::string Parameter::getGeometryFileM()
 {
-	return ic.geometryFileM;
+    return ic.geometryFileM;
 }
 std::string Parameter::getGeometryFileF()
 {
-	return ic.geometryFileF;
+    return ic.geometryFileF;
 }
 real Parameter::getRe()
 {
-	return ic.Re;
+    return ic.Re;
 }
 real Parameter::getFactorPressBC()
 {
-	return ic.factorPressBC;
+    return ic.factorPressBC;
 }
 std::vector<int> Parameter::getGridX()
 {
-	return ic.GridX;
+    return ic.GridX;
 }
 std::vector<int> Parameter::getGridY()
 {
-	return ic.GridY;
+    return ic.GridY;
 }
 std::vector<int> Parameter::getGridZ()
 {
-	return ic.GridZ;
+    return ic.GridZ;
 }
 std::vector<int> Parameter::getDistX()
 {
-	return ic.DistX;
+    return ic.DistX;
 }
 std::vector<int> Parameter::getDistY()
 {
-	return ic.DistY;
+    return ic.DistY;
 }
 std::vector<int> Parameter::getDistZ()
 {
-	return ic.DistZ;
+    return ic.DistZ;
 }
 std::vector<real> Parameter::getScaleLBMtoSI()
 {
-	return ic.scaleLBMtoSI;
+    return ic.scaleLBMtoSI;
 }
 std::vector<real> Parameter::getTranslateLBMtoSI()
 {
-	return ic.translateLBMtoSI;
+    return ic.translateLBMtoSI;
 }
 std::vector<real> Parameter::getMinCoordX()
 {
-	return ic.minCoordX;
+    return ic.minCoordX;
 }
 std::vector<real> Parameter::getMinCoordY()
 {
-	return ic.minCoordY;
+    return ic.minCoordY;
 }
 std::vector<real> Parameter::getMinCoordZ()
 {
-	return ic.minCoordZ;
+    return ic.minCoordZ;
 }
 std::vector<real> Parameter::getMaxCoordX()
 {
-	return ic.maxCoordX;
+    return ic.maxCoordX;
 }
 std::vector<real> Parameter::getMaxCoordY()
 {
-	return ic.maxCoordY;
+    return ic.maxCoordY;
 }
 std::vector<real> Parameter::getMaxCoordZ()
 {
-	return ic.maxCoordZ;
+    return ic.maxCoordZ;
 }
 TempforBoundaryConditions* Parameter::getTempH()
 {
-	return this->TempH;
+    return this->TempH;
 }
 TempforBoundaryConditions* Parameter::getTempD()
 {
-	return this->TempD;
+    return this->TempD;
 }
 TempVelforBoundaryConditions* Parameter::getTempVelH()
 {
-	return this->TempVelH;
+    return this->TempVelH;
 }
 TempVelforBoundaryConditions* Parameter::getTempVelD()
 {
-	return this->TempVelD;
+    return this->TempVelD;
 }
 TempPressforBoundaryConditions* Parameter::getTempPressH()
 {
-	return this->TempPressH;
+    return this->TempPressH;
 }
 TempPressforBoundaryConditions* Parameter::getTempPressD()
 {
-	return this->TempPressD;
+    return this->TempPressD;
 }
 //unsigned int Parameter::getkInflowQ()
 //{
@@ -1971,522 +1992,522 @@ TempPressforBoundaryConditions* Parameter::getTempPressD()
 //}
 std::string Parameter::getkFull()
 {
-	return ic.kFull;
+    return ic.kFull;
 }
 std::string Parameter::getgeoFull()
 {
-	return ic.geoFull;
+    return ic.geoFull;
 }
 std::string Parameter::getgeoVec()
 {
-	return ic.geoVec;
+    return ic.geoVec;
 }
 std::string Parameter::getcoordX()
 {
-	return ic.coordX;
+    return ic.coordX;
 }
 std::string Parameter::getcoordY()
 {
-	return ic.coordY;
+    return ic.coordY;
 }
 std::string Parameter::getcoordZ()
 {
-	return ic.coordZ;
+    return ic.coordZ;
 }
 std::string Parameter::getneighborX()
 {
-	return ic.neighborX;
+    return ic.neighborX;
 }
 std::string Parameter::getneighborY()
 {
-	return ic.neighborY;
+    return ic.neighborY;
 }
 std::string Parameter::getneighborZ()
 {
-	return ic.neighborZ;
+    return ic.neighborZ;
 }
 std::string Parameter::getneighborWSB()
 {
-	return ic.neighborWSB;
+    return ic.neighborWSB;
 }
 std::string Parameter::getscaleCFC()
 {
-	return ic.scaleCFC;
+    return ic.scaleCFC;
 }
 std::string Parameter::getscaleCFF()
 {
-	return ic.scaleCFF;
+    return ic.scaleCFF;
 }
 std::string Parameter::getscaleFCC()
 {
-	return ic.scaleFCC;
+    return ic.scaleFCC;
 }
 std::string Parameter::getscaleFCF()
 {
-	return ic.scaleFCF;
+    return ic.scaleFCF;
 }
 std::string Parameter::getscaleOffsetCF()
 {
-	return ic.scaleOffsetCF;
+    return ic.scaleOffsetCF;
 }
 std::string Parameter::getscaleOffsetFC()
 {
-	return ic.scaleOffsetFC;
+    return ic.scaleOffsetFC;
 }
 std::string Parameter::getgeomBoundaryBcQs()
 {
-	return ic.geomBoundaryBcQs;
+    return ic.geomBoundaryBcQs;
 }
 std::string Parameter::getgeomBoundaryBcValues()
 {
-	return ic.geomBoundaryBcValues;
+    return ic.geomBoundaryBcValues;
 }
 std::string Parameter::getnoSlipBcPos()
 {
-	return ic.noSlipBcPos;
+    return ic.noSlipBcPos;
 }
 std::string Parameter::getnoSlipBcQs()
 {
-	return ic.noSlipBcQs;
+    return ic.noSlipBcQs;
 }
 std::string Parameter::getnoSlipBcValue()
 {
-	return ic.noSlipBcValue;
+    return ic.noSlipBcValue;
 }
 std::string Parameter::getnoSlipBcValues()
 {
-	return ic.noSlipBcValues;
+    return ic.noSlipBcValues;
 }
 std::string Parameter::getslipBcPos()
 {
-	return ic.slipBcPos;
+    return ic.slipBcPos;
 }
 std::string Parameter::getslipBcQs()
 {
-	return ic.slipBcQs;
+    return ic.slipBcQs;
 }
 std::string Parameter::getslipBcValue()
 {
-	return ic.slipBcValue;
+    return ic.slipBcValue;
 }
 std::string Parameter::getpressBcPos()
 {
-	return ic.pressBcPos;
+    return ic.pressBcPos;
 }
 std::string Parameter::getpressBcQs()
 {
-	return ic.pressBcQs;
+    return ic.pressBcQs;
 }
 std::string Parameter::getpressBcValue()
 {
-	return ic.pressBcValue;
+    return ic.pressBcValue;
 }
 std::string Parameter::getpressBcValues()
 {
-	return ic.pressBcValues;
+    return ic.pressBcValues;
 }
 std::string Parameter::getvelBcQs()
 {
-	return ic.velBcQs;
+    return ic.velBcQs;
 }
 std::string Parameter::getvelBcValues()
 {
-	return ic.velBcValues;
+    return ic.velBcValues;
 }
 std::string Parameter::getinletBcQs()
 {
-	return ic.inletBcQs;
+    return ic.inletBcQs;
 }
 std::string Parameter::getinletBcValues()
 {
-	return ic.inletBcValues;
+    return ic.inletBcValues;
 }
 std::string Parameter::getoutletBcQs()
 {
-	return ic.outletBcQs;
+    return ic.outletBcQs;
 }
 std::string Parameter::getoutletBcValues()
 {
-	return ic.outletBcValues;
+    return ic.outletBcValues;
 }
 std::string Parameter::gettopBcQs()
 {
-	return ic.topBcQs;
+    return ic.topBcQs;
 }
 std::string Parameter::gettopBcValues()
 {
-	return ic.topBcValues;
+    return ic.topBcValues;
 }
 std::string Parameter::getbottomBcQs()
 {
-	return ic.bottomBcQs;
+    return ic.bottomBcQs;
 }
 std::string Parameter::getbottomBcValues()
 {
-	return ic.bottomBcValues;
+    return ic.bottomBcValues;
 }
 std::string Parameter::getfrontBcQs()
 {
-	return ic.frontBcQs;
+    return ic.frontBcQs;
 }
 std::string Parameter::getfrontBcValues()
 {
-	return ic.frontBcValues;
+    return ic.frontBcValues;
 }
 std::string Parameter::getbackBcQs()
 {
-	return ic.backBcQs;
+    return ic.backBcQs;
 }
 std::string Parameter::getbackBcValues()
 {
-	return ic.backBcValues;
+    return ic.backBcValues;
 }
 std::string Parameter::getwallBcQs()
 {
-	return ic.wallBcQs;
+    return ic.wallBcQs;
 }
 std::string Parameter::getwallBcValues()
 {
-	return ic.wallBcValues;
+    return ic.wallBcValues;
 }
 std::string Parameter::getperiodicBcQs()
 {
-	return ic.periodicBcQs;
+    return ic.periodicBcQs;
 }
 std::string Parameter::getperiodicBcValues()
 {
-	return ic.periodicBcValues;
+    return ic.periodicBcValues;
 }
 std::string Parameter::getpropellerQs()
 {
-	return ic.propellerQs;
+    return ic.propellerQs;
 }
 std::string Parameter::getpropellerValues()
 {
-	return ic.propellerValues;
+    return ic.propellerValues;
 }
 std::string Parameter::getpropellerCylinder()
 {
-	return ic.propellerCylinder;
+    return ic.propellerCylinder;
 }
 std::string Parameter::getmeasurePoints()
 {
-	return ic.measurePoints;
+    return ic.measurePoints;
 }
 std::string Parameter::getLBMvsSI()
 {
-	return ic.LBMvsSI;
+    return ic.LBMvsSI;
 }
 std::string Parameter::getnumberNodes()
 {
-	return ic.numberNodes;
+    return ic.numberNodes;
 }
 std::string Parameter::getcpTop()
 {
-	return ic.cpTop;
+    return ic.cpTop;
 }
 std::string Parameter::getcpBottom()
 {
-	return ic.cpBottom;
+    return ic.cpBottom;
 }
 std::string Parameter::getcpBottom2()
 {
-	return ic.cpBottom2;
+    return ic.cpBottom2;
 }
 std::string Parameter::getConcentration()
 {
-	return ic.concentration;
+    return ic.concentration;
 }
 std::string Parameter::getStreetVelocityFilePath()
 {
-	return ic.streetVelocity;
+    return ic.streetVelocity;
 }
 real Parameter::getclockCycleForMP()
 {
-	return ic.clockCycleForMP;
+    return ic.clockCycleForMP;
 }
 unsigned int Parameter::getTimeDoCheckPoint()
 {
-	return ic.tDoCheckPoint;
+    return ic.tDoCheckPoint;
 }
 unsigned int Parameter::getTimeDoRestart()
 {
-	return ic.tDoRestart;
+    return ic.tDoRestart;
 }
 bool Parameter::getDoCheckPoint()
 {
-	return ic.doCheckPoint;
+    return ic.doCheckPoint;
 }
 bool Parameter::getDoRestart()
 {
-	return ic.doRestart;
+    return ic.doRestart;
 }
 bool Parameter::getIsGeo()
 {
-	return ic.isGeo;
+    return ic.isGeo;
 }
 bool Parameter::getIsGeoNormal()
 {
-	return ic.isGeoNormal;
+    return ic.isGeoNormal;
 }
 bool Parameter::getIsInflowNormal()
 {
-	return ic.isInflowNormal;
+    return ic.isInflowNormal;
 }
 bool Parameter::getIsOutflowNormal()
 {
-	return ic.isOutflowNormal;
+    return ic.isOutflowNormal;
 }
 bool Parameter::getIsCp()
 {
-	return ic.isCp;
+    return ic.isCp;
 }
 bool Parameter::getConcFile()
 {
-	return ic.isConc;
+    return ic.isConc;
 }
 bool Parameter::isStreetVelocityFile()
 {
-	return ic.streetVelocityFile;
+    return ic.streetVelocityFile;
 }
 bool Parameter::getUseMeasurePoints()
 {
-	return ic.isMeasurePoints;
+    return ic.isMeasurePoints;
 }
 bool Parameter::getUseWale()
 {
-	return ic.isWale;
+    return ic.isWale;
 }
 bool Parameter::getUseInitNeq()
 {
-	return ic.isInitNeq;
+    return ic.isInitNeq;
 }
 bool Parameter::getSimulatePorousMedia()
 {
-	return ic.simulatePorousMedia;
+    return ic.simulatePorousMedia;
 }
 
 bool Parameter::getIsF3()
 {
-	return this->isF3; 
+    return this->isF3; 
 }
 
 bool Parameter::getIsBodyForce() 
 { 
-	return this->isBodyForce; 
+    return this->isBodyForce; 
 }
 
 bool Parameter::getIsGeometryValues()
 {
-	return ic.GeometryValues;
+    return ic.GeometryValues;
 }
 bool Parameter::getCalc2ndOrderMoments()
 {
-	return ic.is2ndOrderMoments;
+    return ic.is2ndOrderMoments;
 }
 bool Parameter::getCalc3rdOrderMoments()
 {
-	return ic.is3rdOrderMoments;
+    return ic.is3rdOrderMoments;
 }
 bool Parameter::getCalcHighOrderMoments()
 {
-	return ic.isHighOrderMoments;
+    return ic.isHighOrderMoments;
 }
 bool Parameter::getIsProp()
 {
-	return ic.isProp;
+    return ic.isProp;
 }
 bool Parameter::overWritingRestart(uint t)
 {
-	return t == getTimeDoRestart();
+    return t == getTimeDoRestart();
 }
 unsigned int Parameter::getTimestepForMP()
 {
-	return ic.timeStepForMP;
+    return ic.timeStepForMP;
 }
 unsigned int Parameter::getTimestepOfCoarseLevel()
 {
-	return this->timestep;
+    return this->timestep;
 }
 double Parameter::getMemsizeGPU()
 {
-	return this->memsizeGPU;
+    return this->memsizeGPU;
 }
 //1D domain decomposition
 std::vector<std::string> Parameter::getPossNeighborFiles(std::string sor)
 {
-	if (sor=="send")
-	{
-		return this->possNeighborFilesSend;
-	} 
-	else if (sor == "recv")
-	{
-		return this->possNeighborFilesRecv;
-	}
+    if (sor=="send")
+    {
+        return this->possNeighborFilesSend;
+    } 
+    else if (sor == "recv")
+    {
+        return this->possNeighborFilesRecv;
+    }
     throw std::runtime_error("Parameter string invalid.");
 }
 unsigned int Parameter::getNumberOfProcessNeighbors(int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		return (unsigned int)parH[level]->sendProcessNeighbor.size();
-	} 
-	else if (sor == "recv")
-	{
-		return (unsigned int)parH[level]->recvProcessNeighbor.size();
-	}
+    if (sor=="send")
+    {
+        return (unsigned int)parH[level]->sendProcessNeighbor.size();
+    } 
+    else if (sor == "recv")
+    {
+        return (unsigned int)parH[level]->recvProcessNeighbor.size();
+    }
     throw std::runtime_error("Parameter string invalid.");
 }
 bool Parameter::getIsNeighbor()
 {
-	return this->isNeigbor;
+    return this->isNeigbor;
 }
 //3D domain decomposition
 std::vector<std::string> Parameter::getPossNeighborFilesX(std::string sor)
 {
-	if (sor=="send")
-	{
-		return this->possNeighborFilesSendX;
-	} 
-	else if (sor == "recv")
-	{
-		return this->possNeighborFilesRecvX;
-	}
+    if (sor=="send")
+    {
+        return this->possNeighborFilesSendX;
+    } 
+    else if (sor == "recv")
+    {
+        return this->possNeighborFilesRecvX;
+    }
     throw std::runtime_error("Parameter string invalid.");
 }
 std::vector<std::string> Parameter::getPossNeighborFilesY(std::string sor)
 {
-	if (sor=="send")
-	{
-		return this->possNeighborFilesSendY;
-	} 
-	else if (sor == "recv")
-	{
-		return this->possNeighborFilesRecvY;
-	}
+    if (sor=="send")
+    {
+        return this->possNeighborFilesSendY;
+    } 
+    else if (sor == "recv")
+    {
+        return this->possNeighborFilesRecvY;
+    }
     throw std::runtime_error("Parameter string invalid.");
 }
 std::vector<std::string> Parameter::getPossNeighborFilesZ(std::string sor)
 {
-	if (sor=="send")
-	{
-		return this->possNeighborFilesSendZ;
-	} 
-	else if (sor == "recv")
-	{
-		return this->possNeighborFilesRecvZ;
-	}
+    if (sor=="send")
+    {
+        return this->possNeighborFilesSendZ;
+    } 
+    else if (sor == "recv")
+    {
+        return this->possNeighborFilesRecvZ;
+    }
     throw std::runtime_error("Parameter string invalid.");
 }
 unsigned int Parameter::getNumberOfProcessNeighborsX(int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		return (unsigned int)parH[level]->sendProcessNeighborX.size();
-	} 
-	else if (sor == "recv")
-	{
-		return (unsigned int)parH[level]->recvProcessNeighborX.size();
-	}
+    if (sor=="send")
+    {
+        return (unsigned int)parH[level]->sendProcessNeighborX.size();
+    } 
+    else if (sor == "recv")
+    {
+        return (unsigned int)parH[level]->recvProcessNeighborX.size();
+    }
     throw std::runtime_error("getNumberOfProcessNeighborsX: Parameter string invalid.");
 }
 unsigned int Parameter::getNumberOfProcessNeighborsY(int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		return (unsigned int)parH[level]->sendProcessNeighborY.size();
-	} 
-	else if (sor == "recv")
-	{
-		return (unsigned int)parH[level]->recvProcessNeighborY.size();
-	}
+    if (sor=="send")
+    {
+        return (unsigned int)parH[level]->sendProcessNeighborY.size();
+    } 
+    else if (sor == "recv")
+    {
+        return (unsigned int)parH[level]->recvProcessNeighborY.size();
+    }
     throw std::runtime_error("getNumberOfProcessNeighborsY: Parameter string invalid.");
 }
 unsigned int Parameter::getNumberOfProcessNeighborsZ(int level, std::string sor)
 {
-	if (sor=="send")
-	{
-		return (unsigned int)parH[level]->sendProcessNeighborZ.size();
-	} 
-	else if (sor == "recv")
-	{
-		return (unsigned int)parH[level]->recvProcessNeighborZ.size();
-	}
+    if (sor=="send")
+    {
+        return (unsigned int)parH[level]->sendProcessNeighborZ.size();
+    } 
+    else if (sor == "recv")
+    {
+        return (unsigned int)parH[level]->recvProcessNeighborZ.size();
+    }
     throw std::runtime_error("getNumberOfProcessNeighborsZ: Parameter string invalid.");
 }
 
 bool Parameter::getIsNeighborX()
 {
-	return this->isNeigborX;
+    return this->isNeigborX;
 }
 bool Parameter::getIsNeighborY()
 {
-	return this->isNeigborY;
+    return this->isNeigborY;
 }
 bool Parameter::getIsNeighborZ()
 {
-	return this->isNeigborZ;
+    return this->isNeigborZ;
 }
 std::string Parameter::getgeomBoundaryNormalX()
 {
-	return ic.geomNormalX;
+    return ic.geomNormalX;
 }
 std::string Parameter::getgeomBoundaryNormalY()
 {
-	return ic.geomNormalY;
+    return ic.geomNormalY;
 }
 std::string Parameter::getgeomBoundaryNormalZ()
 {
-	return ic.geomNormalZ;
+    return ic.geomNormalZ;
 }
 std::string Parameter::getInflowBoundaryNormalX()
 {
-	return ic.inflowNormalX;
+    return ic.inflowNormalX;
 }
 std::string Parameter::getInflowBoundaryNormalY()
 {
-	return ic.inflowNormalY;
+    return ic.inflowNormalY;
 }
 std::string Parameter::getInflowBoundaryNormalZ()
 {
-	return ic.inflowNormalZ;
+    return ic.inflowNormalZ;
 }
 std::string Parameter::getOutflowBoundaryNormalX()
 {
-	return ic.outflowNormalX;
+    return ic.outflowNormalX;
 }
 std::string Parameter::getOutflowBoundaryNormalY()
 {
-	return ic.outflowNormalY;
+    return ic.outflowNormalY;
 }
 std::string Parameter::getOutflowBoundaryNormalZ()
 {
-	return ic.outflowNormalZ;
+    return ic.outflowNormalZ;
 }
 curandState* Parameter::getRandomState()
 {
-	return this->devState;
+    return this->devState;
 }
 
 std::string Parameter::getMainKernel()
 {
-	return mainKernel;
+    return mainKernel;
 }
 bool Parameter::getMultiKernelOn()
 {
-	return multiKernelOn;
+    return multiKernelOn;
 }
 std::vector< int> Parameter::getMultiKernelLevel()
 {
-	return multiKernelLevel;
+    return multiKernelLevel;
 }
 std::vector<std::string> Parameter::getMultiKernel()
 {
-	return multiKernel;
+    return multiKernel;
 }
 std::string Parameter::getADKernel()
 {
-	return adKernel;
+    return adKernel;
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -2502,49 +2523,49 @@ std::function<void(real,real,real,real&,real&,real&,real&)>& Parameter::getIniti
 
 real Parameter::TrafoXtoWorld(int CoordX, int level)
 {
-	return (parH[level]->mTtoWx*CoordX+parH[level]->cTtoWx);
+    return (parH[level]->mTtoWx*CoordX+parH[level]->cTtoWx);
 }
 real Parameter::TrafoYtoWorld(int CoordY, int level)
 {
-	return (parH[level]->mTtoWy*CoordY+parH[level]->cTtoWy);
+    return (parH[level]->mTtoWy*CoordY+parH[level]->cTtoWy);
 }
 real Parameter::TrafoZtoWorld(int CoordZ, int level)
 {
-	return (parH[level]->mTtoWz*CoordZ+parH[level]->cTtoWz);
+    return (parH[level]->mTtoWz*CoordZ+parH[level]->cTtoWz);
 }
 real Parameter::TrafoXtoMGsWorld(int CoordX, int level)
 {
-	real temp = 0;
-	for (int i = 0; i <= level; i++)
-	{
-		temp += (parH[i]->XdistKn + 0.25f) * 2.f * parH[i]->dx;
-	}
-	temp += (real)((CoordX ) * parH[level]->dx);
-	return temp;
+    real temp = 0;
+    for (int i = 0; i <= level; i++)
+    {
+        temp += (parH[i]->XdistKn + 0.25f) * 2.f * parH[i]->dx;
+    }
+    temp += (real)((CoordX ) * parH[level]->dx);
+    return temp;
 }
 real Parameter::TrafoYtoMGsWorld(int CoordY, int level)
 {
-	real temp = 0;
-	for (int i = 0; i <= level; i++)
-	{
-		temp += (parH[i]->YdistKn + 0.25f) * 2.f * parH[i]->dx;
-	}
-	temp += (real)((CoordY ) * parH[level]->dx);
-	return temp;
+    real temp = 0;
+    for (int i = 0; i <= level; i++)
+    {
+        temp += (parH[i]->YdistKn + 0.25f) * 2.f * parH[i]->dx;
+    }
+    temp += (real)((CoordY ) * parH[level]->dx);
+    return temp;
 }
 real Parameter::TrafoZtoMGsWorld(int CoordZ, int level)
 {
-	real temp = 0;
-	for (int i = 0; i <= level; i++)
-	{
-		temp += (parH[i]->ZdistKn + 0.25f) * 2.f * parH[i]->dx;
-	}
-	temp += (real)((CoordZ) * parH[level]->dx);
-	return temp;
+    real temp = 0;
+    for (int i = 0; i <= level; i++)
+    {
+        temp += (parH[i]->ZdistKn + 0.25f) * 2.f * parH[i]->dx;
+    }
+    temp += (real)((CoordZ) * parH[level]->dx);
+    return temp;
 }
 
 void Parameter::setUseStreams() {
-	this->useStreams = true;
+    this->useStreams = true;
     this->cudaStreamManager = std::make_unique<CudaStreamManager>();
 }
 
@@ -2555,11 +2576,11 @@ std::unique_ptr<CudaStreamManager> &Parameter::getStreamManager() { return this-
 bool Parameter::getKernelNeedsFluidNodeIndicesToRun() { return this->kernelNeedsFluidNodeIndicesToRun; }
 
 void Parameter::findEdgeNodesCommMultiGPU() { 
-	for (uint level = 0; level < parH.size(); level++) {
+    for (uint level = 0; level < parH.size(); level++) {
         findEdgeNodesXY(level);
-		findEdgeNodesXZ(level);
+        findEdgeNodesXZ(level);
         findEdgeNodesYZ(level);
-	}
+    }
 }
 
 void Parameter::findEdgeNodesXY(int level)
@@ -2567,30 +2588,30 @@ void Parameter::findEdgeNodesXY(int level)
     int indexOfProcessNeighborSend;
     int indexInSendBuffer;
     for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "recv")); i++)
-	{
+    {
         for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) {
             int index = parH[level]->recvProcessNeighborX[i].index[j];
             bool foundIndex = findIndexInSendNodesXY(level, index, indexOfProcessNeighborSend, indexInSendBuffer);
             if (foundIndex){
                 this->parH[level]->edgeNodesXtoY.emplace_back(i, j, indexOfProcessNeighborSend, indexInSendBuffer);
-			}
-		}		
-	}
+            }
+        }		
+    }
 }
 
 bool Parameter::findIndexInSendNodesXY(int level, int index, int &indexOfProcessNeighborSend, int &indexInSendBuffer)
 {
     for (uint k = 0; k < (unsigned int)(this->getNumberOfProcessNeighborsY(level, "send")); k++)
-	{
+    {
         for (int l = 0; l < parH[level]->sendProcessNeighborY[k].numberOfNodes; l++)
-		{
+        {
             if (parH[level]->sendProcessNeighborY[k].index[l] == index) {;
                 indexOfProcessNeighborSend = k;
                 indexInSendBuffer          = l;
                 return true;
             }
-		}
-	}
+        }
+    }
     return false;
 }
 
@@ -2599,15 +2620,15 @@ void Parameter::findEdgeNodesXZ(int level)
     int indexOfProcessNeighborSend;
     int indexInSendBuffer;
     for (uint i = 0; i < (unsigned int)(this->getNumberOfProcessNeighborsX(level, "recv")); i++)
-	{
+    {
         for (int j = 0; j < parH[level]->recvProcessNeighborX[i].numberOfNodes; j++) {
             int index       = parH[level]->recvProcessNeighborX[i].index[j];
             bool foundIndex = findIndexInSendNodesXZ(level, index, indexOfProcessNeighborSend, indexInSendBuffer);
             if (foundIndex){
                 this->parH[level]->edgeNodesXtoZ.emplace_back(i, j, indexOfProcessNeighborSend, indexInSendBuffer);
-			}
+            }
         }
-	}
+    }
 }
 
 bool Parameter::findIndexInSendNodesXZ(int level, int index, int &indexOfProcessNeighborSend, int &indexInSendBuffer)
@@ -2619,8 +2640,8 @@ bool Parameter::findIndexInSendNodesXZ(int level, int index, int &indexOfProcess
                 indexInSendBuffer          = l;
                 return true;
             }
-		}
-	}
+        }
+    }
     return false;
 }
 
@@ -2634,9 +2655,9 @@ void Parameter::findEdgeNodesYZ(int level)
             bool foundIndex = findIndexInSendNodesYZ(level, index, indexOfProcessNeighborSend, indexInSendBuffer);
             if (foundIndex){
                 this->parH[level]->edgeNodesYtoZ.emplace_back(i, j, indexOfProcessNeighborSend, indexInSendBuffer);
-			}
+            }
         }
-	}
+    }
 }
 
 bool Parameter::findIndexInSendNodesYZ(int level, int index, int &indexOfProcessNeighborSend, int &indexInSendBuffer)
@@ -2648,8 +2669,8 @@ bool Parameter::findIndexInSendNodesYZ(int level, int index, int &indexOfProcess
                 indexInSendBuffer          = l;
                 return true;
             }
-		}
-	}
+        }
+    }
     return false;
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 48430875d3994ee68c4f8582d28f81fd8db0dc18..d351309b408ca29a350ff657959ef3f2ff047b57 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -118,6 +118,11 @@ struct LBMSimulationParameter
     real *turbViscosity;
     real *gSij, *gSDij, *gDxvx, *gDyvx, *gDzvx, *gDxvy, *gDyvy, *gDzvy, *gDxvz, *gDyvz, *gDzvz; // DebugInformation
 
+    // turbulence intensity //
+    real *vx_mean, *vy_mean, *vz_mean; // means
+    real *vxx, *vyy, *vzz, *vxy, *vxz, *vyz; // fluctuations
+    std::vector<real> turbulenceIntensity;
+
     // macroscopic values//////
     real *vx, *vy, *vz, *rho;
     real *vx_SP, *vy_SP, *vz_SP, *rho_SP, *press_SP;
@@ -382,6 +387,7 @@ public:
     void setTOut(unsigned int tout);
     void setTStartOut(unsigned int tStartOut);
     void setTimestepOfCoarseLevel(unsigned int timestep);
+    void setCalcTurbulenceIntensity(bool calcVelocityAndFluctuations);
     void setCalcMedian(bool calcMedian);
     void setCalcDragLift(bool calcDragLift);
     void setCalcCp(bool calcCp);
@@ -585,6 +591,7 @@ public:
     bool getCompOn();
     bool getPrintFiles();
     bool getReadGeo();
+    bool getCalcTurbulenceIntensity();
     bool getCalcMedian();
     bool getCalcDragLift();
     bool getCalcCp();
@@ -808,6 +815,7 @@ private:
     bool calcCp { false };
     bool writeVeloASCII { false };
     bool calcPlaneConc { false };
+    bool calcVelocityAndFluctuations{ false };
     bool isBodyForce;
     int diffMod {27};
     int maxlevel {0};