diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt b/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
index 3b703d6e4f32924457d9d207045c031f74180514..a1eb3358a0f7abeba7eb540edb899ae417144448 100644
--- a/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
+++ b/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
@@ -7,7 +7,7 @@ NumberOfDevices=1
 ##################################################
 #informations for Writing
 ##################################################
-Path=/work/y0078217/Results/MusselOysterResults/1GPUMussel2/
+Path=/work/y0078217/Results/MusselOysterResults/1GPUMussel1/
 #Path="F:/Work/Computations/out/MusselOyster/"
 #Prefix="MusselOyster" 
 #WriteGrid=true
@@ -31,6 +31,6 @@ GridPath=/work/y0078217/Grids/GridMusselOyster/Mussel1GPU/
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=500000
-TimeOut=100000 
+TimeEnd=400000
+TimeOut=200000 
 #TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
index 33fbfbf17024d1d83c08ce14dff5880f5c7d97e5..2217511b3fc8caf70b698a8c3bbfcaf041aa2b88 100644
--- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
+++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
@@ -119,10 +119,11 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator                  = true;
-    bool useStreams                        = false;
-    bool useLevels                         = true;
-    para->useReducedCommunicationAfterFtoC = false;
-    std::string scalingType                = "weak"; // "strong" // "weak"
+    bool useLevels                         = false;
+    std::string scalingType                = "strong"; // "strong" // "weak"
+    // bool useStreams                        = true;
+    // para->useReducedCommunicationAfterFtoC = true;
+    bool useStreams = para->getUseStreams();
 
     if (para->getNumprocs() == 1) {
        useStreams       = false;
@@ -197,11 +198,12 @@ void multipleLevel(const std::string& configPath)
 
     if (useGridGenerator) {
         real sideLengthCube;
-        if (useLevels)
+        if (useLevels){
             if (scalingType == "strong")
-                sideLengthCube = 70.0; // Phoenix: strong scaling with two levels = 76.0
+                sideLengthCube = 76.0; // Phoenix: strong scaling with two levels = 76.0
             else if (scalingType == "weak")
                 sideLengthCube = 70.0; // Phoenix: weak scaling with two levels = 70.0
+        }
         else
             sideLengthCube = 86.0; // Phoenix: 86.0
         real xGridMin          = 0.0; 
@@ -286,13 +288,13 @@ void multipleLevel(const std::string& configPath)
                 gridBuilder->setPeriodicBoundaryCondition(false, false, false);
                 //////////////////////////////////////////////////////////////////////////
                 gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
-                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
                 if (generatePart == 0)
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
                 if (generatePart == 1)
                     gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
            
@@ -404,7 +406,7 @@ void multipleLevel(const std::string& configPath)
                     gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
                 }
                 gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
-                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
             } else if (comm->getNummberOfProcess() == 8) {
@@ -585,13 +587,13 @@ void multipleLevel(const std::string& configPath)
                 }
                 if (generatePart == 2) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
-                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                 }
                 if (generatePart == 3) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
-                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                 }
                 if (generatePart == 4) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
@@ -605,13 +607,13 @@ void multipleLevel(const std::string& configPath)
                 }
                 if (generatePart == 6) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
-                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
                 }
                 if (generatePart == 7) {
                     gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
-                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
                     gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
                 }
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////                
@@ -656,10 +658,10 @@ void multipleLevel(const std::string& configPath)
             //////////////////////////////////////////////////////////////////////////
             gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
 
             // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
             //////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt
index a3aa0dca2f11b7b33631a99a32bb0eee82469fef..a62a49094745209d585830ee60531b195acbfd2b 100644
--- a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU.txt
@@ -14,7 +14,7 @@ Path=/work/y0078217/Results/SphereScalingResults/8GPU/
 ##################################################
 #informations for reading
 ##################################################
-GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling4GPU/
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
 #GridPath="C:"
 
 ##################################################
@@ -38,5 +38,5 @@ TimeOut=100000
 ##################################################
 # CUDA Streams and optimized communication (only used for multiple GPUs)
 ##################################################
-# useStreams = true
-# useReducedCommunicationInInterpolation = true
\ No newline at end of file
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..892f11013d6742af416ba3b93a993b059a6fa3a0
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevStrongOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2715162f6c5bdd90908bf19d775d1f3e3dd4a52d
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevStrongStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/CMakeLists.txt b/apps/gpu/LBM/SphereScaling1/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a759675bd752a78d24f5dc795c3705b3679617c8
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(SphereScaling1 LANGUAGES CUDA CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES SphereScaling1.cpp)
+
+set_source_files_properties(SphereScaling1.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(SphereScaling1 PROPERTIES 
+	CUDA_SEPARABLE_COMPILATION ON)
+	# VS_DEBUGGER_COMMAND "C:/Program Files/Microsoft MPI/Bin/mpiexec.exe"
+    # VS_DEBUGGER_COMMAND_ARGUMENTS "-n 2 \"$<TARGET_FILE:SphereScaling1>\"")
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp b/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e97e5bb9bb8c320ea6e1594cb40d0269da00ce74
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/SphereScaling1.cpp
@@ -0,0 +1,754 @@
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <stdexcept>
+#include <fstream>
+#include <exception>
+#include <memory>
+
+#include "mpi.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "basics/Core/DataTypes.h"
+#include "basics/PointerDefinitions.h"
+#include "basics/Core/VectorTypes.h"
+
+#include "basics/Core/LbmOrGks.h"
+#include "basics/Core/StringUtilities/StringUtil.h"
+#include "basics/config/ConfigurationFile.h"
+#include "basics/Core/Logger/Logger.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
+#include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
+#include "GridGenerator/grid/GridFactory.h"
+
+#include "geometries/Sphere/Sphere.h"
+#include "geometries/Cuboid/Cuboid.h"
+#include "geometries/Conglomerate/Conglomerate.h"
+#include "geometries/TriangularMesh/TriangularMesh.h"
+
+#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
+#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
+#include "GridGenerator/io/STLReaderWriter/STLReader.h"
+#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
+#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+
+//////////////////////////////////////////////////////////////////////////
+
+#include "utilities/communication.h"
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//          U s e r    s e t t i n g s
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//  Tesla 03
+//  std::string outPath("E:/temp/SphereScalingResults/");
+//  std::string gridPathParent = "E:/temp/GridSphereScaling/";
+//  std::string simulationName("SphereScaling");
+// std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/Sphere/");
+
+// Phoenix
+std::string outPath("/work/y0078217/Results/SphereScalingResults/");
+std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/";
+std::string simulationName("SphereScaling");
+std::string stlPath("/home/y0078217/STL/Sphere/");
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void multipleLevel(const std::string& configPath)
+{
+    logging::Logger::addStream(&std::cout);
+    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+    logging::Logger::timeStamp(logging::Logger::ENABLE);
+    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+    auto gridFactory = GridFactory::make();
+    gridFactory->setGridStrategy(Device::CPU);
+    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+
+    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    
+	vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::basics::ConfigurationFile config;
+    std::cout << configPath << std::endl;
+    config.load(configPath);
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+
+
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    bool useGridGenerator                  = true;
+    bool useLevels                         = true;
+    std::string scalingType                = "weak"; // "strong" // "weak"
+    // bool useStreams                        = true;
+    // para->useReducedCommunicationAfterFtoC = true;
+    bool useStreams = para->getUseStreams();
+
+    if (para->getNumprocs() == 1) {
+       useStreams       = false;
+       para->useReducedCommunicationAfterFtoC = false;
+    }
+    if (scalingType != "weak" && scalingType != "strong")
+        std::cerr << "unknown scaling type" << std::endl;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
+
+    real dxGrid      = (real)0.2;
+    real vxLB = (real)0.0005; // LB units
+    real viscosityLB = 0.001; //(vxLB * dxGrid) / Re;
+
+    para->setVelocity(vxLB);
+    para->setViscosity(viscosityLB);
+    para->setVelocityRatio((real) 58.82352941);
+    para->setViscosityRatio((real) 0.058823529);
+    para->setDensityRatio((real) 998.0);
+
+    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio()<< " \n";
+    *logging::out << logging::Logger::INFO_HIGH << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "para->useReducedCommunicationAfterFtoC = " <<  para->useReducedCommunicationAfterFtoC << "\n";
+    *logging::out << logging::Logger::INFO_HIGH << "scalingType = " <<  scalingType << "\n";
+    
+    // para->setTOut(10);
+    // para->setTEnd(10);
+
+    para->setCalcDragLift(false);
+    para->setUseWale(false);
+
+    if (para->getOutputPath().size() == 0) {
+        para->setOutputPath(outPath);
+    }
+    para->setOutputPrefix(simulationName);
+    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    para->setPrintFiles(true);
+    std::cout << "Write result files to " << para->getFName() << std::endl;
+
+    if (useLevels)
+        para->setMaxLevel(2);
+    else
+        para->setMaxLevel(1);
+
+
+    if (useStreams)
+        para->setUseStreams();
+    //para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("CumulantK17CompChimStream");
+    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    // if (para->getNumprocs() == 4) {
+    //     para->setDevices(std::vector<uint>{ 0u, 1u, 2u, 3u });
+    //     para->setMaxDev(4);
+    // } else if (para->getNumprocs() == 2) {
+    //     para->setDevices(std::vector<uint>{ 2u, 3u });
+    //     para->setMaxDev(2);
+    // } else 
+    //     para->setDevices(std::vector<uint>{ 0u });
+    //     para->setMaxDev(1);
+
+
+
+    //////////////////////////////////////////////////////////////////////////
+
+
+    if (useGridGenerator) {
+        real sideLengthCube;
+        if (useLevels){
+            if (scalingType == "strong")
+                sideLengthCube = 76.0; // Phoenix: strong scaling with two levels = 76.0
+            else if (scalingType == "weak")
+                sideLengthCube = 70.0; // Phoenix: weak scaling with two levels = 70.0
+        }
+        else
+            sideLengthCube = 86.0; // Phoenix: 86.0
+        real xGridMin          = 0.0; 
+        real xGridMax          = sideLengthCube;
+        real yGridMin          = 0.0;
+        real yGridMax          = sideLengthCube;
+        real zGridMin          = 0.0;
+        real zGridMax          = sideLengthCube;
+        const real dSphere     = 10.0;
+        const real dSphereLev1 = 22.0; // Phoenix: 22.0
+        const real dCubeLev1   = 72.0; // Phoenix: 72.0
+
+        if (para->getNumprocs() > 1) {
+            const uint generatePart = vf::gpu::Communicator::getInstanz()->getPID();
+
+            real overlap = (real)8.0 * dxGrid;
+            gridBuilder->setNumberOfLayers(10, 8);
+
+            if (comm->getNummberOfProcess() == 2) {
+                real zSplit = 0.5 * sideLengthCube;
+                    
+                if (scalingType == "weak"){
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + sideLengthCube;
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zSplit + overlap,
+                                               dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xGridMax, yGridMax, zGridMax,
+                                               dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( -0.5*dCubeLev1, -0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                           0.5*dCubeLev1,  0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(0.0, 0.0, sideLengthCube, dSphere));
+                    }else{
+                        // Sphere* sphere1 = new  Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere);
+                        // Sphere* sphere2 = new  Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 1.5 * sideLengthCube, dSphere);
+                        // auto conglo = Conglomerate::makeShared();
+                        // conglo->add(sphere1);
+                        // conglo->add(sphere2);
+                        // gridBuilder->addGeometry(conglo.get());
+
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_2GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 1);
+                }
+
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                //////////////////////////////////////////////////////////////////////////
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                if (generatePart == 0)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                if (generatePart == 1)
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+           
+            } else if (comm->getNummberOfProcess() == 4) {
+                real ySplit= 0.5 * sideLengthCube;
+                real zSplit= 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax , ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( -0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                           0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(0.0, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_4GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+                                
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 2);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 3);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);                    
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////
+            } else if (comm->getNummberOfProcess() == 8) {
+                real xSplit = 0.5 * sideLengthCube;
+                real ySplit = 0.5 * sideLengthCube;
+                real zSplit = 0.5 * sideLengthCube;
+
+                if (scalingType == "weak") {                    
+                    xSplit = xGridMax;
+                    xGridMax = xGridMax + (xGridMax-xGridMin);
+                    ySplit = yGridMax;
+                    yGridMax = yGridMax + (yGridMax-yGridMin);
+                    zSplit = zGridMax;
+                    zGridMax = zGridMax + (zGridMax-zGridMin);                    
+                }
+
+                if (generatePart == 0) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zGridMin, xSplit + overlap, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zGridMin, xGridMax, ySplit + overlap,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zGridMin, xGridMax, yGridMax,
+                                               zSplit + overlap, dxGrid);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->addCoarseGrid(xGridMin, yGridMin, zSplit - overlap, xSplit + overlap, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->addCoarseGrid(xGridMin, ySplit - overlap, zSplit - overlap, xSplit + overlap, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, yGridMin, zSplit - overlap, xGridMax, ySplit + overlap,
+                                               zGridMax, dxGrid);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->addCoarseGrid(xSplit - overlap, ySplit - overlap, zSplit - overlap, xGridMax, yGridMax,
+                                               zGridMax, dxGrid);
+                }
+
+                if (useLevels) {
+                    gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);                    
+                }
+
+
+                if (useLevels) {
+                    if (scalingType == "strong"){
+                        gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                    } else if (scalingType == "weak"){
+                         gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                          sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+                    }
+                }
+
+                if (scalingType == "weak"){
+                    if (useLevels) {
+                        gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                    }else{
+                        TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_8GPU.stl");
+                        gridBuilder->addGeometry(sphereSTL);
+                    }                    
+                } else if (scalingType == "strong") {
+                    gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+                }
+                
+                if (generatePart == 0)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 1)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 2)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zGridMin, zSplit));
+                if (generatePart == 3)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zGridMin, zSplit));
+                if (generatePart == 4)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 5)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xGridMin, xSplit, ySplit, yGridMax, zSplit, zGridMax));
+                if (generatePart == 6)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, yGridMin, ySplit, zSplit, zGridMax));
+                if (generatePart == 7)
+                    gridBuilder->setSubDomainBox(
+                        std::make_shared<BoundingBox>(xSplit, xGridMax, ySplit, yGridMax, zSplit, zGridMax));
+
+                gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+                gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+
+                if (generatePart == 0) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 4);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 5);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 3);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 6);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 2);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 1);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PZ, 7);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 1);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::PY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::PY, 7);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 4);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 2);
+                }
+                if (generatePart == 7) {
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MY, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MY, 6);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 5);
+                    gridBuilder->findCommunicationIndices(CommunicationDirections::MZ, LBM);
+                    gridBuilder->setCommunicationProcess(CommunicationDirections::MZ, 3);
+                }
+
+                //////////////////////////////////////////////////////////////////////////
+                if (generatePart == 0) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 1) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 2) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 3) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
+                }
+                if (generatePart == 4) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 5) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                }
+                if (generatePart == 6) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                if (generatePart == 7) {
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+                    gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+                    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+                }
+                // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+                //////////////////////////////////////////////////////////////////////////                
+            }
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk(outPath + "grid/part" +
+            // std::to_string(generatePart) + "_"); gridBuilder->writeGridsToVtk(outPath +
+            // std::to_string(generatePart) + "/grid/"); gridBuilder->writeArrows(outPath + 
+            // std::to_string(generatePart) + " /arrow");
+
+            SimulationFileWriter::write(gridPath + std::to_string(generatePart) + "/", gridBuilder,
+                                        FILEFORMAT::BINARY);
+        } else {
+
+            gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zGridMax, dxGrid);
+
+            if (useLevels) {
+                    gridBuilder->setNumberOfLayers(10, 8);
+                if(scalingType == "strong"){
+                    gridBuilder->addGrid(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphereLev1), 1);
+                } else if (scalingType == "weak")
+                    gridBuilder->addGrid(new Cuboid( sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, sideLengthCube-0.5*dCubeLev1, 
+                                                     sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1, sideLengthCube+0.5*dCubeLev1),1);
+            }
+                
+            if (scalingType == "weak"){
+                if(useLevels){
+                    gridBuilder->addGeometry(new Sphere(sideLengthCube, sideLengthCube, sideLengthCube, dSphere));
+                }else{
+                   TriangularMesh *sphereSTL = TriangularMesh::make(stlPath + "Spheres_1GPU.stl");
+                   gridBuilder->addGeometry(sphereSTL);
+                }
+            } else {
+                gridBuilder->addGeometry(new Sphere(0.5 * sideLengthCube, 0.5 * sideLengthCube, 0.5 * sideLengthCube, dSphere));
+            }
+
+            gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
+            
+            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+            //////////////////////////////////////////////////////////////////////////
+            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vxLB, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);  // set pressure BC after velocity BCs
+
+            // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+            //////////////////////////////////////////////////////////////////////////
+            if (para->getKernelNeedsFluidNodeIndicesToRun())
+                gridBuilder->findFluidNodes(useStreams);
+
+            // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + "/grid/");
+            // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + "/arrow");
+
+            SimulationFileWriter::write(gridPath, gridBuilder, FILEFORMAT::BINARY);
+        }        
+    }
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+
+    SPtr<GridProvider> gridGenerator;
+    if (useGridGenerator)
+        gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+    else {
+        gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+    }
+           
+    Simulation sim;
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+    SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+    sim.setFactories(kernelFactory, preProcessorFactory);
+    sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);
+    sim.run();
+    sim.free();
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+}
+
+int main( int argc, char* argv[])
+{
+    MPI_Init(&argc, &argv);
+    std::string str, str2, configFile;
+
+    if ( argv != NULL )
+    {
+        
+        try
+        {
+            //////////////////////////////////////////////////////////////////////////
+
+			std::string targetPath;
+
+			targetPath = __FILE__;
+
+            if (argc == 2) {
+                configFile = argv[1];
+                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+            }
+
+#ifdef _WIN32
+            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
+#else
+            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
+#endif
+
+			std::cout << targetPath << std::endl;
+
+            if (configFile.size()==0) {
+                configFile = targetPath + "config.txt";
+            }        
+
+			multipleLevel(configFile);
+
+            //////////////////////////////////////////////////////////////////////////
+		}
+        catch (const std::bad_alloc& e)
+        { 
+            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+        }
+        catch (const std::exception& e)
+        {   
+            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+        }
+        catch (...)
+        {
+            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+        }
+    }
+
+   MPI_Finalize();
+   return 0;
+}
diff --git a/apps/gpu/LBM/SphereScaling1/config.txt b/apps/gpu/LBM/SphereScaling1/config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44c5fedb297cc62f8b1b5d26c075bd5172cac081
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/config.txt
@@ -0,0 +1,46 @@
+# Tesla 03
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/SphereScaling.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+# Phoenix
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+
+# Phoenix mpich
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# Phoenix openmpi
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+
+##################################################
+#GPU Mapping
+##################################################
+#Devices="0 1 2 3"
+#NumberOfDevices=2
+
+##################################################
+#informations for Writing
+##################################################
+#Path="E:/temp/SphereScalingResults/"
+Path=/work/y0078217/Results/SphereScalingResults/
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/
+#GridPath=E:/temp/GridSphereScaling/
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+#TimeEnd=10
+#TimeOut=10 
+#TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b1e745aebe158e8573ca045fc228a6f9b7dd34
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakOS.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/2LevWeakOS/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = false
+useReducedCommunicationInInterpolation = false
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt
new file mode 100644
index 0000000000000000000000000000000000000000..714298741be1f37911c25efede7b8186323dd78c
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling1/configPhoenix8GPU_2LevWeakStream.txt
@@ -0,0 +1,42 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3 4 5 6 7"
+NumberOfDevices=8
+
+##################################################
+#informations for Writing
+##################################################
+Path=/work/y0078217/Results/SphereScalingResults/8GPU/1LevWeakStream/
+#Path="F:/Work/Computations/out/SphereScaling/"
+#Prefix="SphereScaling" 
+#WriteGrid=true
+##################################################
+#informations for reading
+##################################################
+GridPath=/work/y0078217/Grids/GridSphereScaling/SphereScaling8GPU/
+#GridPath="C:"
+
+##################################################
+#number of grid levels
+##################################################
+#NOGL=1
+
+##################################################
+#LBM Version
+##################################################
+#D3Qxx=27
+#MainKernelName=CumulantK17CompChim
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=100000
+TimeOut=100000 
+#TimeStartOut=0
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/gpu.cmake b/gpu.cmake
index a358baeb386158cae4f7183991e925b825d6acba..27a93b5d5618caefb615d1526cde2c298c1715ae 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -45,9 +45,10 @@ IF (BUILD_VF_GPU)
     #add_subdirectory(apps/gpu/LBM/TGV_3D)
     #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
 	add_subdirectory(apps/gpu/LBM/SphereScaling)
+    add_subdirectory(apps/gpu/LBM/SphereScaling1)
 	add_subdirectory(apps/gpu/LBM/MusselOyster)
-	add_subdirectory(apps/gpu/LBM/MusselOyster2x)
-	add_subdirectory(apps/gpu/LBM/MusselOyster3z)
+	#add_subdirectory(apps/gpu/LBM/MusselOyster2x)
+	#add_subdirectory(apps/gpu/LBM/MusselOyster3z)
 ELSE()
     MESSAGE( STATUS "exclude Virtual Fluids GPU." )
 ENDIF()