diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
old mode 100644
new mode 100755
index e171e2e7fbe1984588355f5a833a21160024da32..1cff49512e8325e7dad10f60d5b88c3a90f30420
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -235,7 +235,7 @@ gcc_9_python_hpc_test:
     - hpc-rocket launch --watch Python/SlurmTests/poiseuille/rocket.yml
 
 ###############################################################################
-multigpu_hpc_test:
+regression_test_4gpu:
   image: python:latest
   stage: test
 
@@ -252,12 +252,39 @@ multigpu_hpc_test:
     - pip install "fieldcompare[all]"
 
   script:
-    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket.yml
+    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket4GPU.yml
     - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
     - cd test_data
-    - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels
+    - git sparse-checkout set regression_tests/gpu/DrivenCavity_4GPU_2Levels regression_tests/gpu/SphereScaling_4GPU_2Levels
     - cd ..
-    - fieldcompare dir output/results test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "*.vtu"
+    - fieldcompare dir output/4GPU test_data/regression_tests/gpu/DrivenCavity_4GPU_2Levels --include-files "DrivenCavityMultiGPU*.vtu"
+    - fieldcompare dir output/4GPU test_data/regression_tests/gpu/SphereScaling_4GPU_2Levels --include-files "SphereScaling*.vtu"
+
+###############################################################################
+regression_test_8gpu:
+  image: python:latest
+  stage: test
+
+  rules:
+    - if: $REMOTE_USER && $REMOTE_HOST && $PRIVATE_KEY && $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
+
+  before_script:
+    - pip install hpc-rocket
+    - pip install "fieldcompare[all]"
+
+  script:
+    - hpc-rocket launch --watch regression-tests/multigpu_test/rocket8GPU.yml
+    - git clone --depth 1 --filter=blob:none --sparse https://github.com/irmb/test_data
+    - cd test_data
+    - git sparse-checkout set regression_tests/gpu/DrivenCavity_8GPU_2Levels regression_tests/gpu/SphereScaling_8GPU_2Levels
+    - cd ..
+    - fieldcompare dir output/8GPU test_data/regression_tests/gpu/DrivenCavity_8GPU_2Levels --include-files "DrivenCavityMultiGPU*.vtu"
+    - fieldcompare dir output/8GPU test_data/regression_tests/gpu/SphereScaling_8GPU_2Levels --include-files "SphereScaling*.vtu"
 
 ###############################################################################
 ##                            Benchmark                                      ##
diff --git a/CMake/cmake_config_files/HUSSEIN.config.cmake b/CMake/cmake_config_files/HUSSEIN.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..9878edaaffb28ab04646493faa58eb18c05f7df9
--- /dev/null
+++ b/CMake/cmake_config_files/HUSSEIN.config.cmake
@@ -0,0 +1,14 @@
+#################################################################################
+# VirtualFluids MACHINE FILE
+# Responsible: Hussein Alihussein
+# OS:          Windows 10
+#################################################################################
+
+#################################################################################
+#  BOOST  
+#################################################################################
+SET(BOOST_VERSION "1.76.0")
+SET(BOOST_ROOT "C:/Users/hussein/Documents/VirtualFluids/boost_1_76_0")
+SET(BOOST_DIR ${BOOST_ROOT})
+SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
+#################################################################################
diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake
index 5ca4d9821d918f66745fc27363975811dc278440..d31d8684a53a769e48408ad5febe7d2c6b22c623 100644
--- a/CMake/cmake_config_files/PHOENIX.config.cmake
+++ b/CMake/cmake_config_files/PHOENIX.config.cmake
@@ -28,7 +28,7 @@ set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100
 
 set(GPU_APP "apps/gpu/LBM/")
 list(APPEND USER_APPS 
-    "${GPU_APP}DrivenCavityMultiGPU"
+    # "${GPU_APP}DrivenCavityMultiGPU"
     # "${GPU_APP}SphereScaling"
     # "${GPU_APP}MusselOyster"
     )
diff --git a/apps/cpu/Applications.cmake b/apps/cpu/Applications.cmake
index 68dfeb3ed7687da74d49a35337b0bae92798e80d..3c71d51344030980071addc6f9831a74d0daa53c 100644
--- a/apps/cpu/Applications.cmake
+++ b/apps/cpu/Applications.cmake
@@ -83,4 +83,6 @@ ENDIF()
 #add_subdirectory(Applications/OrganPipe)
 #add_subdirectory(Applications/LidDrivenCavity)
 
-
+if(BUILD_USE_BOOST)
+    add_subdirectory(${APPS_ROOT_CPU}/TPMSRow)
+endif()
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp1 b/apps/cpu/FlowAroundCylinder/cylinder.cpp1
index 5321a23d1c03fe85270cf8e382b8d50dc5df4351..f4001248da3e7fec9921da00d6932c376ea6dc66 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp1
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp1
@@ -248,7 +248,7 @@ void run(const char *cstr)
          unsigned long nod = nob * (blocknx1+gl) * (blocknx2+gl) * (blocknx3+gl);
 
          double needMemAll  = double(nod*(27*sizeof(double) + sizeof(int) + sizeof(float)*4));
-         double needMem  = needMemAll / double(comm->getNummberOfProcesses());
+         double needMem  = needMemAll / double(comm->getNumberOfProcesses());
 
          if(myid == 0)
          {
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp2 b/apps/cpu/FlowAroundCylinder/cylinder.cpp2
index 4dc7285b37131250607166cca3de70db53935156..107f4882f38dbada406a106ce6bfa2a8122f7379 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp2
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp2
@@ -262,7 +262,7 @@ void run(const char *cstr)
          unsigned long nod = nob * (blocknx1+gl) * (blocknx2+gl) * (blocknx3+gl);
 
          double needMemAll  = double(nod*(27*sizeof(double) + sizeof(int) + sizeof(float)*4));
-         double needMem  = needMemAll / double(comm->getNummberOfProcesses());
+         double needMem  = needMemAll / double(comm->getNumberOfProcesses());
 
          if(myid == 0)
          {
diff --git a/apps/cpu/HerschelBulkleyModel/hbflow.cpp b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
index 567fd661cd2e131e3f4f311285bd636f471dccb6..dcb82533ec1da20664babe4ee1f16df0bc10cbc0 100644
--- a/apps/cpu/HerschelBulkleyModel/hbflow.cpp
+++ b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
@@ -137,21 +137,33 @@ void bflow(string configname)
       //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyPowellEyringModelNoSlipBCAlgorithm()));
       //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelNoSlipBCAlgorithm()));
 
+      mu::Parser fctVx;
+      fctVx.SetExpr("u");
+      fctVx.DefineConst("u", 0.001);
+ 
+
+      SPtr<BCAdapter> velocityBCAdapter(new VelocityBCAdapter(true, false, false, fctVx, 0, BCFunction::INFCONST));
+      //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
+      velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelVelocityBCAlgorithm()));
+
       //BS visitor
       BoundaryConditionsBlockVisitor bcVisitor;
       //bcVisitor.addBC(noSlipBCAdapter);
+      //bcVisitor.addBC(velocityBCAdapter);
 
       SPtr<BCProcessor> bcProc;
       bcProc = SPtr<BCProcessor>(new BCProcessor());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new PowellEyringModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new HerschelBulkleyModelLBMKernel());
-      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyK17LBMKernel());
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyK17LBMKernel());
+      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BinghamModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CompressibleCumulant4thOrderViscosityLBMKernel());
       
       //double forcingXY = forcing / sqrt(2.0);
       //kernel->setForcingX1(forcingXY);
       //kernel->setForcingX2(forcingXY);
+      
       kernel->setForcingX1(forcing);
       kernel->setWithForcing(true);
       kernel->setBCProcessor(bcProc);
@@ -293,6 +305,14 @@ void bflow(string configname)
       SPtr<UbScheduler> nupsSch(new UbScheduler(10, 30, 100));
       SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
 
+      SPtr<UbScheduler> forceSch(new UbScheduler(1000));
+      //real dummy = 1;
+      SPtr<CalculateTorqueCoProcessor> fp = std::make_shared<CalculateTorqueCoProcessor>(grid, forceSch, pathname + "/forces/forces.csv", comm);
+      fp->addInteractor(addWallYminInt);
+
+      SPtr<CalculateTorqueCoProcessor> fp2 = std::make_shared<CalculateTorqueCoProcessor>(grid, forceSch, pathname + "/forces/forces2.csv", comm);
+      fp2->addInteractor(addWallYmaxInt);
+
       //write data for visualization of macroscopic quantities
       SPtr<UbScheduler> visSch(new UbScheduler(outTime));
       SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, pathname,
@@ -305,6 +325,8 @@ void bflow(string configname)
       calculator->addCoProcessor(npr);
       calculator->addCoProcessor(writeMQCoProcessor);
       calculator->addCoProcessor(writeThixotropicMQCoProcessor);
+      calculator->addCoProcessor(fp);
+      calculator->addCoProcessor(fp2);
       //calculator->addCoProcessor(migCoProcessor);
       //calculator->addCoProcessor(restartCoProcessor);
 
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 4fb96833162c896eda7229cef913c21d3a6b7c78..cbafef30c489a26b5f8df9610ec3e6ad7aa1da79 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -357,7 +357,7 @@ void run(string configname)
 int main(int argc, char *argv[])
 {
     try {
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         VF_LOG_INFO("Starting VirtualFluids...");
 
diff --git a/apps/cpu/TPMSRow/CMakeLists.txt b/apps/cpu/TPMSRow/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea54915ed2eb339e0e6e71c9d40c638120e58fa
--- /dev/null
+++ b/apps/cpu/TPMSRow/CMakeLists.txt
@@ -0,0 +1,10 @@
+PROJECT(TPMSRow)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore muparser basics ${MPI_CXX_LIBRARIES} FILES TPMSRow.cpp )
+
+vf_get_library_name (library_name)
+#target_include_dires(${library_name} PRIVATE ${APPS_ROOT_CPU})
+#target_include_dires(${library_name} PRIVATE "/cluster/lib/boost/1.63.0/gcc")
+target_include_directories(${library_name} PRIVATE ${APPS_ROOT_CPU})
+#target_include_directories(${library_name} PRIVATE "/cluster/lib/boost/1.63.0/gcc/include/")
+
diff --git a/apps/cpu/TPMSRow/TPMSRow.cfg b/apps/cpu/TPMSRow/TPMSRow.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..095b0bb32836969bca0ff86b4a31b20e70c72f2a
--- /dev/null
+++ b/apps/cpu/TPMSRow/TPMSRow.cfg
@@ -0,0 +1,34 @@
+pathname = E:\SimulationsResults\TPMSRow
+#pathname = C:\temp\TPMSRow
+#pathname = /mnt/c/temp/TPMSRow
+numOfThreads = 1
+availMem = 15e10
+refineLevel = 0
+
+#Grid
+length =0.025 0.01 0.01
+blocknx = 25 50 50
+#dx =0.000050000000000000000000000000000000
+dx=0.0001
+#Geometry
+UnitEdgeLength=0.01
+TPMSL = 0.01 0.01 0.01
+TPMSOrigin = 0 0 0
+gridCubeOrigin = -0.005 0 0
+
+#Simulation
+nu = 0.0001523579766536965
+Re = 6563.489631218715
+Re0 = 6563.489631218715
+
+timeAvStart = 300
+timeAvStop = 1500
+
+beginTime = 0
+outTime = 100
+endTime = 200
+logToFile = false
+newStart = true
+restartStep =  1200000
+cpStart =  1200000
+cpStep =  1200000
diff --git a/apps/cpu/TPMSRow/TPMSRow.cpp b/apps/cpu/TPMSRow/TPMSRow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6369340273cc21c666c1d049ad063a7ae0b8015e
--- /dev/null
+++ b/apps/cpu/TPMSRow/TPMSRow.cpp
@@ -0,0 +1,513 @@
+#include <iostream>
+#include <string>
+
+//#include <boost/pointer_cast.hpp>
+
+#include "VirtualFluids.h"
+
+using namespace std;
+using namespace vf::lbm::dir;
+using namespace vf::lbm::constant;
+
+void run(string configname)
+{
+    try {
+        vf::basics::ConfigurationFile config;
+        config.load(configname);
+
+        string pathname             = config.getValue<string>("pathname");
+        int numOfThreads            = config.getValue<int>("numOfThreads");
+        vector<int> blocknx         = config.getVector<int>("blocknx");
+        double beginTime            = config.getValue<double>("beginTime");
+        double endTime              = config.getValue<double>("endTime");
+        double outTime              = config.getValue<double>("outTime");
+        double availMem             = config.getValue<double>("availMem");
+        double nu                   = config.getValue<double>("nu");
+        double dx                   = config.getValue<double>("dx");
+        double UnitEdgeLength       = config.getValue<double>("UnitEdgeLength");
+        double Re                   = config.getValue<double>("Re");
+        double Re0                  = config.getValue<double>("Re0");
+        //double rhoIn                = config.getValue<double>("rhoIn");
+        //string geometry             = config.getValue<string>("geometry");
+        vector<double> length       = config.getVector<double>("length");
+        //vector<double> FunnelL      = config.getVector<double>("FunnelL");
+        //vector<double> FunnelOrigin = config.getVector<double>("FunnelOrigin");
+        
+        double          timeAvStart       = config.getValue<double>("timeAvStart");
+        double          timeAvStop        = config.getValue<double>("timeAvStop");
+
+        vector<double> TPMSL        = config.getVector<double>("TPMSL");
+        vector<double> TPMSOrigin   = config.getVector<double>("TPMSOrigin");
+        vector<double> gridCubeOrigin = config.getVector<double>("gridCubeOrigin");
+        int refineLevel             = config.getValue<int>("refineLevel");
+        bool logToFile              = config.getValue<bool>("logToFile");
+        double restartStep          = config.getValue<double>("restartStep");
+        double cpStart              = config.getValue<double>("cpStart");
+        double cpStep               = config.getValue<double>("cpStep");
+        bool newStart               = config.getValue<bool>("newStart");
+
+        //SPtr<Communicator> comm = MPICommunicator::getInstance();
+        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        int myid                = comm->getProcessID();
+        //int numOfProcesses      = comm->getNumberOfProcesses();
+
+        if (logToFile) {
+#if defined(__unix__)
+            if (myid == 0) {
+                const char *str = pathname.c_str();
+                mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+            }
+#endif
+
+            if (myid == 0) {
+                stringstream logFilename;
+                logFilename << pathname + "/logfile" + UbSystem::toString(UbSystem::getTimeStamp()) + ".txt";
+                UbLog::output_policy::setStream(logFilename.str());
+            }
+        }
+        //dx = 1. / 100. / 112.;
+        double vx = Re * nu / (UnitEdgeLength / dx);
+
+        SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
+
+        //UbSystem::makeDirectory(pathname);
+         //UbSystem::makeDirectory(pathname+ "/mig");
+         //UbSystem::makeDirectory(pathname+ "/geo");
+         //UbSystem::makeDirectory(pathname+ "/blocks/blocks_");
+      
+
+        ////////////////////////////////////////////////////////////////////////
+        // BC Adapter
+        // BCAdapterPtr gradientAdapter(new VelocityBCAdapter(true, true, true, pdxC, pdyC, pdzC, 0.0,
+        // BCFunction::INFCONST));
+        // gradientAdapter->setBcAlgorithm(BCAlgorithmPtr(new FluxBCAlgorithm()));
+        // BCAdapterPtr cubeNoslipAdapter(new NoSlipBCAdapter(1));
+        SPtr<BCAdapter> tpmsNoslipAdapter(new NoSlipBCAdapter());
+        //SPtr<BCAdapter> funnelNoslipAdapter(new NoSlipBCAdapter(1));
+
+           // SPtr<BCAdapter> xMinApr(new DensityBCAdapter(0.0000001));
+         SPtr<BCAdapter> xMinApr(new DensityBCAdapter());
+        //  SPtr<BCAdapter> xMinApr(new VelocityBCAdapter(vx, 0., BCFunction::INFCONST, 0., 0., BCFunction::INFCONST,
+         //  0.,0., BCFunction::INFCONST));
+
+        SPtr<BCAdapter> xMaxApr(new DensityBCAdapter(0.));
+        //SPtr<BCAdapter> yMinApr(new NoSlipBCAdapter(1));
+        //SPtr<BCAdapter> yMaxApr(new NoSlipBCAdapter(1));
+        SPtr<BCAdapter> zMinApr(new NoSlipBCAdapter());
+        SPtr<BCAdapter> zMaxApr(new NoSlipBCAdapter());
+
+        //SPtr<BCAdapter> zMinFunnelApr(new NoSlipBCAdapter(1));
+        //SPtr<BCAdapter> zMaxFunnelApr(new NoSlipBCAdapter(1));
+
+         //tpmsNoslipAdapter->setBcAlgorithm(BCAlgorithmPtr(new NoSlipBCAlgorithm()));
+         //tpmsNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new ThinWallNoSlipBCAlgorithm()));
+
+        tpmsNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //funnelNoslipAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+         //xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
+        // xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
+         xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonReflectingInflowBCAlgorithm())); 
+        // xMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
+         //xMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
+         xMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NonReflectingOutflowBCAlgorithmWithRelaxation()));
+        //yMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //yMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        zMinApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        zMaxApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+        //zMinFunnelApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+        //zMaxFunnelApr->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+
+        ////////////////////////////////////////////////////////////////////////
+        // BC visitor
+        BoundaryConditionsBlockVisitor bcVisitor;
+        // bcVisitor.addBC(cubeNoslipAdapter);
+        bcVisitor.addBC(tpmsNoslipAdapter);
+        //bcVisitor.addBC(funnelNoslipAdapter);
+        bcVisitor.addBC(xMinApr);
+        bcVisitor.addBC(xMaxApr);
+        //bcVisitor.addBC(yMinApr);
+        //bcVisitor.addBC(yMaxApr);
+        bcVisitor.addBC(zMinApr);
+        bcVisitor.addBC(zMaxApr);
+        //bcVisitor.addBC(zMinFunnelApr);
+        //bcVisitor.addBC(zMaxFunnelApr);
+
+        ////////////////////////////////////////////////////////////////////////    
+        //spnonge layer
+        //mu::Parser spongeLayer;
+        //spongeLayer.SetExpr("x1>=(sizeX-sizeSP)/dx ? (sizeX/dx-(x1+1))/sizeSP/dx/2.0 + 0.5 : 1.0");
+        //spongeLayer.DefineConst("sizeX", length[0]);
+        //spongeLayer.DefineConst("sizeSP", 0.005);
+        //spongeLayer.DefineConst("dx", dx);
+
+        ////////////////////////////////////////////////////////////////////////
+        // grid, kernel and BCProcessor
+        SPtr<Grid3D> grid(new Grid3D(comm));
+        SPtr<LBMKernel> kernel;
+        //kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
+         kernel = SPtr<LBMKernel>(new CompressibleCumulantLBMKernel());
+        //kernel = SPtr<LBMKernel>(new IncompressibleCumulantWithSpongeLayerLBMKernel());       
+        //kernel->setWithSpongeLayer(true);
+        //kernel->setSpongeLayer(spongeLayer);
+        // kernel = ;
+         // kernel = SPtr<LBMKernel>(new CumulantK17LBMKernel());
+        // 		 mu::Parser fctForcingX1;
+        // 		 fctForcingX1.SetExpr("Fx2");
+        // 		 fctForcingX1.DefineConst("Fx2", 5e-4);
+        // 		 kernel->setForcingX1(fctForcingX1);
+        // 		 kernel->setWithForcing(true);
+        //
+        // SPtr<ThinWallBCProcessor> bcProc(new ThinWallBCProcessor());
+        SPtr<BCProcessor> bcProc(new BCProcessor());
+        kernel->setBCProcessor(bcProc);
+
+
+            SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(
+                comm, MetisPartitioningGridVisitor::LevelIntersected, DIR_00M, MetisPartitioner::RECURSIVE));
+
+        //////////////////////////////////////////////////////////////////////////
+        // restart
+        SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
+        SPtr<MPIIOMigrationCoProcessor> migCoProcessor(
+            new MPIIOMigrationCoProcessor(grid, mSch,metisVisitor, pathname + "/mig", comm));
+        migCoProcessor->setLBMKernel(kernel);
+        migCoProcessor->setBCProcessor(bcProc);
+        //////////////////////////////////////////////////////////////////////////
+
+        if (newStart) {
+            GbImplicitSurfacePtr tpms;
+            // tpms = GbImplicitSurfacePtr(new GbImplicitSurface(0, 0, 0, TPMSL[0], TPMSL[1], TPMSL[2], UnitEdgeLength,
+            // dx));
+            tpms = GbImplicitSurfacePtr(new GbImplicitSurface(TPMSOrigin[0], TPMSOrigin[1], TPMSOrigin[2],
+                                                              TPMSOrigin[0] + TPMSL[0],
+                                                              TPMSOrigin[1] + TPMSL[1],
+                                                              TPMSOrigin[2] + TPMSL[2],
+                                                              UnitEdgeLength, dx, 2.5e-4));
+
+            // 	for (int i = 0; i < 12; i++)
+            // 	{
+            // 	  cout << tpms->evaluateImplicitFunction(0.002, 0.002, i/1000., 1.)<<endl;
+            // 	}
+
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(tpms.get(), pathname + "/geo/tpms", WbWriterVtkXmlBinary::getInstance());
+
+
+            //SPtr<GbTriFaceMesh3D> funnel;
+            //SPtr<GbTriFaceMesh3D> funnel(new GbTriFaceMesh3D());
+            //funnel->readMeshFromSTLFileBinary(geometry, true);
+
+          
+
+            //funnel = SPtr<GbTriFaceMesh3D>(GbTriFaceMesh3DCreator::getInstance()->readMeshFromSTLFile2(geometry, "tpmsMeshBody", GbTriFaceMesh3D::KDTREE_SAHPLIT, false));
+            // funnel->rotate(0.,180,0.);
+
+            //funnel->translate(-funnel->getX1Minimum() - funnel->getLengthX1(),
+                              //tpms->getX2Centroid() - funnel->getX2Centroid(),
+                              //tpms->getX3Centroid() - funnel->getX3Centroid());
+            //if (myid == 0)
+                //GbSystem3D::writeGeoObject(funnel.get(), pathname + "/geo/funnel", WbWriterVtkXmlBinary::getInstance());
+
+            double g_minX1 = gridCubeOrigin[0];
+            double g_minX2 = gridCubeOrigin[1];
+            double g_minX3 = gridCubeOrigin[2];
+
+            double g_maxX1 = gridCubeOrigin[0] + length[0];
+            double g_maxX2 = gridCubeOrigin[1] + length[1];
+            double g_maxX3 = gridCubeOrigin[2] + length[2];
+
+            SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube",
+                                           WbWriterVtkXmlBinary::getInstance());
+
+            
+            SPtr<GbCuboid3D> spongecube(new GbCuboid3D(TPMSOrigin[0] + TPMSL[0], g_minX2 - dx, g_minX3 - dx,
+                                                       g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(spongecube.get(), pathname + "/geo/spongecube",
+                                           WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0) {
+                // UBLOG(logINFO,"rho = " << rhoLB );
+                UBLOG(logINFO, "nu = " << nu);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "vx = " << vx);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "Preprocess - start");
+            }
+
+            grid->setDeltaX(dx);
+            grid->setBlockNX(blocknx[0], blocknx[1], blocknx[2]);
+            grid->setPeriodicX1(false);
+            grid->setPeriodicX2(true);
+            grid->setPeriodicX3(false);
+
+            GenBlocksGridVisitor genBlocks(gridCube);
+            grid->accept(genBlocks);
+
+            SPtr<CoProcessor> ppblocks(new WriteBlocksCoProcessor(grid, SPtr<UbScheduler>(new UbScheduler(1)), pathname,
+                                                                  WbWriterVtkXmlBinary::getInstance(), comm));
+
+            ppblocks->process(0);
+
+            // GbObject3DPtr solidcube(new GbCuboid3D(0, g_minX2, g_minX3, TPMSL[0], g_maxX2, g_maxX3));
+            // if (myid == 0) GbSystem3D::writeGeoObject(solidcube.get(), pathname + "/geo/solidcube",
+            // WbWriterVtkXmlBinary::getInstance());
+
+            GbCuboid3DPtr xMin(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_minX1, g_maxX2 + dx, g_maxX3 + dx));
+
+            /*GbCuboid3DPtr yMin(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1, g_minX2, g_maxX3 + dx));
+            GbCuboid3DPtr yMax(
+                new GbCuboid3D(g_minX1 - dx, g_maxX2, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));*/
+
+           /* GbCuboid3DPtr zMinFunnel(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1, g_maxX2 + dx, g_minX3));
+            GbCuboid3DPtr zMaxFunnel(
+                new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));*/
+
+            //g_minX1 = 0.;
+            // g_minX2 = -length[1] / 2.0;
+            // g_minX3 = -length[2] / 2.0;
+
+            //g_maxX1 = TPMSL[0];
+            // g_maxX2 = length[1] / 2.0;
+            // g_maxX3 -= TPMSL[2] / 2.0;
+
+            GbCuboid3DPtr xMax(new GbCuboid3D(g_maxX1 , g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx,
+                                              g_maxX3 + dx));
+
+            //GbCuboid3DPtr zMin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, 1.1 * g_maxX1, g_maxX2 + dx,
+            //                                  g_minX3 + 0.5 * (length[2] - TPMSL[2])));
+            //GbCuboid3DPtr zMax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3 - 0.5 * (length[2] - TPMSL[2]),
+            //                                  1.1 * g_maxX1, g_maxX2 + dx, g_maxX3));
+
+            GbCuboid3DPtr zMin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_minX3));
+            GbCuboid3DPtr zMax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
+
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(xMin.get(), pathname + "/geo/xMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(xMax.get(), pathname + "/geo/xMax", WbWriterVtkXmlBinary::getInstance());
+           /* if (myid == 0)
+                GbSystem3D::writeGeoObject(yMin.get(), pathname + "/geo/yMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(yMax.get(), pathname + "/geo/yMax", WbWriterVtkXmlBinary::getInstance());*/
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMin.get(), pathname + "/geo/zMin", WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMax.get(), pathname + "/geo/zMax", WbWriterVtkXmlBinary::getInstance());
+
+ /*           if (myid == 0)
+                GbSystem3D::writeGeoObject(zMinFunnel.get(), pathname + "/geo/zMinFunnel",
+                                           WbWriterVtkXmlBinary::getInstance());
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(zMaxFunnel.get(), pathname + "/geo/zMaxFunnel",
+                                           WbWriterVtkXmlBinary::getInstance());*/
+
+            // D3Q27InteractorPtr cubeInt = D3Q27InteractorPtr(new D3Q27Interactor(solidcube, grid, cubeNoslipAdapter,
+            // Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> tpmsInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(tpms, grid, tpmsNoslipAdapter, Interactor3D::SOLID, Interactor3D::POINTS));
+            //SPtr<Interactor3D> funnelInt = SPtr<D3Q27TriFaceMeshInteractor>(
+                //new D3Q27TriFaceMeshInteractor(funnel, grid, funnelNoslipAdapter, Interactor3D::SOLID));
+            // D3Q27TriFaceMeshInteractorPtr tpmsInt = D3Q27TriFaceMeshInteractorPtr(new
+            // D3Q27TriFaceMeshInteractor(tpms, grid, tpmsNoslipAdapter, Interactor3D::SOLID));
+            //  tpmsInt->setQs2(0);
+
+            SPtr<D3Q27Interactor> xMinInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(xMin, grid, xMinApr, Interactor3D::SOLID, Interactor3D::POINTS));
+            SPtr<D3Q27Interactor> xMaxInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(xMax, grid, xMaxApr, Interactor3D::SOLID, Interactor3D::POINTS));
+          /*  SPtr<D3Q27Interactor> yMinInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(yMin, grid, yMinApr, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> yMaxInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(yMax, grid, yMaxApr, Interactor3D::SOLID));*/
+            SPtr<D3Q27Interactor> zMinInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(zMin, grid, zMinApr, Interactor3D::SOLID, Interactor3D::POINTS));
+            SPtr<D3Q27Interactor> zMaxInt = SPtr<D3Q27Interactor>(
+                new D3Q27Interactor(zMax, grid, zMaxApr, Interactor3D::SOLID, Interactor3D::POINTS));
+
+            /*SPtr<D3Q27Interactor> zMinFunnelInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(zMinFunnel, grid, zMinFunnelApr, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> zMaxFunnelInt =
+                SPtr<D3Q27Interactor>(new D3Q27Interactor(zMaxFunnel, grid, zMaxFunnelApr, Interactor3D::SOLID));*/
+
+            // return;
+
+            InteractorsHelper intHelper(grid, metisVisitor,false);
+
+            //intHelper.addInteractor(cubeInt);
+            //intHelper.addInteractor(zMinFunnelInt);
+            //intHelper.addInteractor(zMaxFunnelInt);
+            //intHelper.addInteractor(funnelInt);
+
+            intHelper.addInteractor(tpmsInt);
+            intHelper.addInteractor(zMinInt);
+            intHelper.addInteractor(zMaxInt);
+
+            intHelper.addInteractor(xMinInt);
+            intHelper.addInteractor(xMaxInt);
+            //intHelper.addInteractor(yMinInt);
+            //intHelper.addInteractor(yMaxInt);
+
+
+            intHelper.selectBlocks();
+            // intHelper.selectBlocks2();
+
+            
+            // domain decomposition for threads
+            PQueuePartitioningGridVisitor pqPartVisitor(numOfThreads);
+            grid->accept(pqPartVisitor);
+
+            ppblocks->process(0);
+            ppblocks.reset();
+
+            //////////////////////////////////////////////////////////////////////////
+            unsigned long long numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
+            int ghostLayer                    = 3;
+            unsigned long long numberOfNodesPerBlock =
+                (unsigned long long)(blocknx[0]) * (unsigned long long)(blocknx[1]) * (unsigned long long)(blocknx[2]);
+            unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
+            unsigned long long numberOfNodesPerBlockWithGhostLayer =
+                numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
+            double needMemAll =
+                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
+            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+
+            if (myid == 0) {
+                UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
+                UBLOG(logINFO, "Number of nodes  = " << numberOfNodes);
+                int minInitLevel = grid->getCoarsestInitializedLevel();
+                int maxInitLevel = grid->getFinestInitializedLevel();
+                for (int level = minInitLevel; level <= maxInitLevel; level++) {
+                    int nobl = grid->getNumberOfBlocks(level);
+                    UBLOG(logINFO, "Number of blocks for level " << level << " = " << nobl);
+                    UBLOG(logINFO, "Number of nodes for level " << level << " = " << nobl * numberOfNodesPerBlock);
+                }
+                UBLOG(logINFO, "Necessary memory  = " << needMemAll << " bytes");
+                UBLOG(logINFO, "Necessary memory per process = " << needMem << " bytes");
+                UBLOG(logINFO, "Available memory per process = " << availMem << " bytes");
+            }
+            //////////////////////////////////////////////////////////////////////////
+
+            SetKernelBlockVisitor kernelVisitor(kernel, nu, availMem, needMem);
+            grid->accept(kernelVisitor);
+
+            //          if (refineLevel > 0)
+            //          {
+            // 			 SetUndefinedNodesBlockVisitor undefNodesVisitor;
+            //             grid->accept(undefNodesVisitor);
+            //          }
+
+            intHelper.setBC();
+
+            SpongeLayerBlockVisitor spongeLayerVisitor(spongecube, kernel, nu, DIR_P00);
+            grid->accept(spongeLayerVisitor);
+
+            grid->accept(bcVisitor);
+
+            // initialization of distributions
+            InitDistributionsBlockVisitor initVisitor;
+             //initVisitor.setVx1(0.001);
+            // initVisitor.setVx1(uLB);
+            grid->accept(initVisitor);
+
+            // boundary conditions grid
+            {
+                SPtr<UbScheduler> geoSch(new UbScheduler(1));
+                SPtr<CoProcessor> ppgeo(new WriteBoundaryConditionsCoProcessor(grid, geoSch, pathname, WbWriterVtkXmlBinary::getInstance(), comm));
+                ppgeo->process(0);
+                ppgeo.reset();
+            }
+            if (myid == 0)
+                UBLOG(logINFO, "Preprocess - end");
+        } 
+        else 
+        {
+            if (myid == 0) {
+                UBLOG(logINFO, "Parameters:");
+                //UBLOG(logINFO, "uLb = " << uLB);
+                //UBLOG(logINFO, "rho = " << rhoLB);
+                //UBLOG(logINFO, "nuLb = " << nuLB);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "number of levels = " << refineLevel + 1);
+                UBLOG(logINFO, "numOfThreads = " << numOfThreads);
+                UBLOG(logINFO, "path = " << pathname);
+            }
+
+            migCoProcessor->restart((int)restartStep);
+            grid->setTimeStep(restartStep);
+
+            if (myid == 0)
+                UBLOG(logINFO, "Restart - end");
+        }
+        // set connectors
+        SPtr<InterpolationProcessor> iProcessor(new CompressibleOffsetInterpolationProcessor());
+        //SetConnectorsBlockVisitor setConnsVisitor(comm, true, D3Q27System::ENDDIR, nu, iProcessor);
+        OneDistributionSetConnectorsBlockVisitor setConnsVisitor(comm);
+        grid->accept(setConnsVisitor);
+
+
+        
+
+        SPtr<UbScheduler> visSch(new UbScheduler(outTime/*,beginTime,endTime*/));
+        SPtr<CoProcessor> pp(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, pathname, WbWriterVtkXmlBinary::getInstance(), conv, comm));
+        
+        SPtr<UbScheduler> tavSch(new UbScheduler(100, timeAvStart, timeAvStop));
+        SPtr<TimeAveragedValuesCoProcessor> tav(new TimeAveragedValuesCoProcessor(grid, pathname, WbWriterVtkXmlBinary::getInstance(), tavSch, comm,
+        TimeAveragedValuesCoProcessor::Density | TimeAveragedValuesCoProcessor::Velocity | TimeAveragedValuesCoProcessor::Fluctuations));
+        tav->setWithGhostLayer(true);        
+        
+        SPtr<UbScheduler> nuSch(new UbScheduler(100, 0, endTime / 2));
+        mu::Parser fnu;
+        fnu.SetExpr("(L*u/T)*(((T-2*t)/Re0)+(2*t/Re))");
+        fnu.DefineConst("Re0", Re0);
+        fnu.DefineConst("Re", Re);
+        fnu.DefineConst("T", endTime);
+        fnu.DefineConst("L", (UnitEdgeLength / dx));
+        fnu.DefineConst("u", vx);
+        SPtr<CoProcessor> nupr(new DecreaseViscosityCoProcessor(grid, nuSch, &fnu, comm));
+
+        SPtr<UbScheduler> nupsSch(new UbScheduler(100, 100, 100000000));
+        SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
+
+        //omp_set_num_threads(numOfThreads);
+        numOfThreads = 1;
+        SPtr<UbScheduler> stepGhostLayer(visSch);
+        SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, int(endTime)));
+
+        //calculator->addCoProcessor(nupr);
+        calculator->addCoProcessor(npr);
+        calculator->addCoProcessor(pp);
+        calculator->addCoProcessor(migCoProcessor);
+        calculator->addCoProcessor(tav);
+
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-start");
+        calculator->calculate();
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-end");
+    } catch (std::exception &e) {
+        cerr << e.what() << endl << flush;
+    } catch (std::string &s) {
+        cerr << s << endl;
+    } catch (...) {
+        cerr << "unknown exception" << endl;
+    }
+}
+int main(int argc, char *argv[])
+{
+     //Sleep(25000);
+    if (argv != NULL) {
+        if (argv[1] != NULL) {
+            run(string(argv[1]));
+        } else {
+            cout << "Configuration file is missing!" << endl;
+        }
+    }
+}
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index 1506bde80f21bce580c6aa781d1075fdc86bcd62..224d170696b90eeea573969b0d17d34021944eac 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -142,9 +142,9 @@ void bflow(string configname)
       //thix->setOmegaMin(omegaMin);
 
       SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
-      noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+      //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
       //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyHerschelBulkleyModelNoSlipBCAlgorithm()));
-      //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelNoSlipBCAlgorithm()));
+      noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelNoSlipBCAlgorithm()));
 
       //SPtr<BCAdapter> slipBCAdapter(new SlipBCAdapter());
       //slipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleSlipBCAlgorithm()));
@@ -181,10 +181,10 @@ void bflow(string configname)
       //fctVx.SetExpr("0.0");
 
       SPtr<BCAdapter> velocityBCAdapter(new VelocityBCAdapter(true, true, true, fctVx, fctVy, fctVz, 0, BCFunction::INFCONST));
-      velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
+      //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
       //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleVelocityBCAlgorithm()));
       //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
-      //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelVelocityBCAlgorithm()));
+      velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelVelocityBCAlgorithm()));
 
       //SPtr<BCAdapter> densityBCAdapter(new DensityBCAdapter());
       //densityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
@@ -202,11 +202,11 @@ void bflow(string configname)
       bcProc = SPtr<BCProcessor>(new BCProcessor());
 
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BGKLBMKernel());
-      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CompressibleCumulant4thOrderViscosityLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantK17LBMKernel()); 
-      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
+      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new HerschelBulkleyModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BinghamModelLBMKernel());
       kernel->setBCProcessor(bcProc);
@@ -243,7 +243,7 @@ void bflow(string configname)
       //around x
       //SPtr<GbObject3D> stator(new GbCylinder3D(g_minX1 - 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3,                                               g_maxX1 + 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3, 0.5 * g_maxX3));
 
-      GbSystem3D::writeGeoObject(rotor.get(), outputPath + "/geo/stator", WbWriterVtkXmlBinary::getInstance());
+      GbSystem3D::writeGeoObject(rotor.get(), outputPath + "/geo/rotor", WbWriterVtkXmlBinary::getInstance());
 
       SPtr<D3Q27Interactor> rotorInt =
           SPtr<D3Q27Interactor>(new D3Q27Interactor(rotor, grid, velocityBCAdapter, Interactor3D::INVERSESOLID));
@@ -254,7 +254,7 @@ void bflow(string configname)
       //around x
       //SPtr<GbObject3D> rotor(new GbCylinder3D(g_minX1 - 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3,                                           g_maxX1 + 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3, 0.25 * g_maxX3));
 
-      GbSystem3D::writeGeoObject(stator.get(), outputPath + "/geo/rotor", WbWriterVtkXmlBinary::getInstance());
+      GbSystem3D::writeGeoObject(stator.get(), outputPath + "/geo/stator", WbWriterVtkXmlBinary::getInstance());
 
       SPtr<D3Q27Interactor> statorInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(stator, grid, noSlipBCAdapter, Interactor3D::SOLID));
 
@@ -427,7 +427,7 @@ void bflow(string configname)
       calculator->addCoProcessor(fp);
       calculator->addCoProcessor(fp2);
       calculator->addCoProcessor(writeMQCoProcessor);
-      //calculator->addCoProcessor(writeThixotropicMQCoProcessor);
+      calculator->addCoProcessor(writeThixotropicMQCoProcessor);
       //calculator->addCoProcessor(restartCoProcessor);
 
       if (myid == 0) UBLOG(logINFO, "Simulation-start");
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 9d982ebac0059b4512041194100f6e1fdfa61924..aa6e52d1f2c20782ed34e3e4bba97ad873cc412e 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -83,6 +83,7 @@
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -146,7 +147,7 @@ void multipleLevel(const std::string& configPath)
     const float tStartOutProbe      =  config.getValue<real>("tStartOutProbe");
     const float tOutProbe           =  config.getValue<real>("tOutProbe");
         
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
 
@@ -192,7 +193,7 @@ void multipleLevel(const std::string& configPath)
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
-    para->setMainKernel("CumulantK17");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
@@ -274,7 +275,7 @@ int main( int argc, char* argv[])
     {
         try
         {
-            vf::logging::Logger::initalizeLogger();
+            vf::logging::Logger::initializeLogger();
 
             if( argc > 1){ path = argv[1]; }
 
diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index 5fc31904433bfe2df0722ab1c63f574d3fcb9a35..3921c85244ad27456e98c750fd64638453546ff5 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -89,24 +89,19 @@
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 #include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
 #include "utilities/communication.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
 std::string path(".");
 
 std::string simulationName("BoundaryLayer");
 
 using namespace vf::lbm::constant;
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
 
 void multipleLevel(const std::string& configPath)
 {
@@ -126,12 +121,12 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
-    const int  nProcs = communicator.getNummberOfProcess();
+    const int  nProcs = communicator.getNumberOfProcess();
     const uint procID = vf::gpu::Communicator::getInstance().getPID();
     std::vector<uint> devices(10);
     std::iota(devices.begin(), devices.end(), 0);
@@ -236,7 +231,7 @@ void multipleLevel(const std::string& configPath)
     bool useStreams = (nProcs > 1 ? true: false);
     // useStreams=false;
     para->setUseStreams(useStreams);
-    para->setMainKernel("CumulantK17");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
     para->setIsBodyForce( config.getValue<bool>("bodyForce") );
 
     para->setTimestepStartOut(uint(tStartOut/dt) );
@@ -284,7 +279,7 @@ void multipleLevel(const std::string& configPath)
 
     gridBuilder->addCoarseGrid( xGridMin,  0.0,  0.0,
                                 xGridMax,  L_y,  L_z, dx);
-    if(true)// Add refinement
+    if(false)// Add refinement
     {
         gridBuilder->setNumberOfLayers(4,0);
         real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax;   //Stop refinement some distance before outlet if domain ist not periodic
@@ -350,13 +345,14 @@ void multipleLevel(const std::string& configPath)
 
     gridBuilder->setStressBoundaryCondition(SideType::MZ,
                                             0.0, 0.0, 1.0,              // wall normals
-                                            samplingOffset, z0, dx);     // wall model settinng
+                                            samplingOffset, z0, dx);    // wall model settinng
+
     para->setHasWallModelMonitor(true);   
     gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0f,  0.0f, -1.0f); 
 
     bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
     bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
-    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); 
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity); 
     bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
     bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor);
     para->setOutflowPressureCorrectionFactor(0.0); 
@@ -454,7 +450,7 @@ int main( int argc, char* argv[])
     {
         try
         {
-            vf::logging::Logger::initalizeLogger();
+            vf::logging::Logger::initializeLogger();
 
             if( argc > 1){ path = argv[1]; }
 
diff --git a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp b/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
index 98982f6d9da4c95ca12319f56ffcc4a1e83d33af..a05f3243040a2fbd0617daa65ac29322f45f7025 100644
--- a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
+++ b/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
@@ -74,6 +74,7 @@
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -97,12 +98,12 @@ int main(int argc, char *argv[])
         //////////////////////////////////////////////////////////////////////////
 
         vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
-        const int numberOfProcesses = communicator.getNummberOfProcess();
+        const int numberOfProcesses = communicator.getNumberOfProcess();
         SPtr<Parameter> para = std::make_shared<Parameter>(numberOfProcesses, communicator.getPID());
         std::vector<uint> devices(10);
         std::iota(devices.begin(), devices.end(), 0);
         para->setDevices(devices);
-        para->setMaxDev(communicator.getNummberOfProcess());
+        para->setMaxDev(communicator.getNumberOfProcess());
         BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
         //////////////////////////////////////////////////////////////////////////
@@ -119,7 +120,7 @@ int main(int argc, char *argv[])
 
         vf::logging::Logger::changeLogPath("output/vflog_process" +
                                            std::to_string(vf::gpu::Communicator::getInstance().getPID()) + ".txt");
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         //////////////////////////////////////////////////////////////////////////
         // setup gridGenerator
@@ -166,7 +167,7 @@ int main(int argc, char *argv[])
         para->setTimestepEnd(timeStepEnd);
 
         para->setOutputPrefix("ChannelFlow");
-        para->setMainKernel("CumulantK17CompChimStream");
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
         const uint generatePart = vf::gpu::Communicator::getInstance().getPID();
         real overlap = (real)8.0 * dx;
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 5e1cab7f48f7fb672c85f0decee4bcc2d4ac158f..a57191a4dd54c9b7ecb06048377acfe59d883277 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -69,13 +69,14 @@
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
 int main()
 {
     try {
-         vf::logging::Logger::initalizeLogger();
+         vf::logging::Logger::initializeLogger();
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
         //////////////////////////////////////////////////////////////////////////
@@ -153,7 +154,7 @@ int main()
         para->setTimestepOut(timeStepOut);
         para->setTimestepEnd(timeStepEnd);
 
-        para->setMainKernel("CumulantK17");
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
         //////////////////////////////////////////////////////////////////////////
         // set boundary conditions
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
old mode 100644
new mode 100755
index ed6b4da7a3218e4d89ac90b053d9c054e4dd8205..1bbb35310e3dcc0a1b56be7d486acfb7370a00f8
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
@@ -51,6 +51,7 @@
 #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -78,7 +79,7 @@ void multipleLevel(std::filesystem::path& configPath)
     vf::basics::ConfigurationFile config;
     std::cout << configPath << std::endl;
     config.load(configPath.string());
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
@@ -93,7 +94,7 @@ void multipleLevel(std::filesystem::path& configPath)
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    const std::string outPath("output/");
+    const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/");
     const std::string gridPath = "output/";
     std::string simulationName("DrivenCavityMultiGPU");
 
@@ -125,7 +126,7 @@ void multipleLevel(std::filesystem::path& configPath)
     para->setPrintFiles(true);
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
-    para->setMainKernel("CumulantK17");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
     scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -163,7 +164,7 @@ void multipleLevel(std::filesystem::path& configPath)
             const real ySplit = 0.0;
             const real zSplit = 0.0;
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zSplit + overlap,
@@ -210,7 +211,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax,
@@ -294,7 +295,7 @@ void multipleLevel(std::filesystem::path& configPath)
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU.txt
rename to apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
index 958ef4714118aac34b8cfb0bec3aab97b108b01d..83e5f41d2b7c9569744167ee1d1f674ab46c8439 100644
--- a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
@@ -69,13 +69,14 @@
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
 int main()
 {
     try {
-         vf::logging::Logger::initalizeLogger();
+         vf::logging::Logger::initializeLogger();
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
         //////////////////////////////////////////////////////////////////////////
@@ -154,7 +155,7 @@ int main()
         para->setTimestepOut(timeStepOut);
         para->setTimestepEnd(timeStepEnd);
 
-        para->setMainKernel("CumulantK17");
+        para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
 
         //////////////////////////////////////////////////////////////////////////
         // set boundary conditions
diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index dc5eaf58aff9b4a1b87d70c187b81461330ee3da..2e43e20f33c061c3d25da0ea2ff53e2351ad3cad 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -49,6 +49,7 @@
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -102,7 +103,7 @@ void multipleLevel(std::filesystem::path &configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath.string());
     SPtr<Parameter> para =
-        std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+        std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -160,8 +161,7 @@ void multipleLevel(std::filesystem::path &configPath)
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
     para->setUseStreams(useStreams);
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
     
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
@@ -206,7 +206,7 @@ void multipleLevel(std::filesystem::path &configPath)
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
                 const real zSplit = 0.0; // round(((double)bbzp + bbzm) * 0.5);
 
                 if (generatePart == 0) {
@@ -257,7 +257,7 @@ void multipleLevel(std::filesystem::path &configPath)
                 gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
 
                 const real xSplit = 100.0;
                 const real zSplit = 0.0;
@@ -347,7 +347,7 @@ void multipleLevel(std::filesystem::path &configPath)
                     gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
                 real xSplit = 140.0; // 100.0 // mit groesserem Level 1 140.0
                 real ySplit = 32.0;  // 32.0
                 real zSplit = 0.0;
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
old mode 100644
new mode 100755
index 1d31ae10a8ae0678505141ebdede43df977dbf5e..c632e9649f7461e8af4a6e9e73d740406283edbf
--- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
+++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
@@ -55,6 +55,7 @@
 #include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -62,28 +63,6 @@
 
 #include "utilities/communication.h"
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Phoenix
-// const std::string outPath("/work/y0078217/Results/SphereScalingResults/");
-// const std::string gridPathParent = "/work/y0078217/Grids/GridSphereScaling/";
-// const std::string simulationName("SphereScaling");
-// const std::string stlPath("/home/y0078217/STL/Sphere/");
-
-// Relative Paths
-const std::string outPath("./output/SphereScalingResults/");
-const std::string gridPathParent = "./output/grids/SphereScalingResults/";
-const std::string simulationName("SphereScaling");
-const std::string stlPath("./stl/SphereScaling/");
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -103,7 +82,7 @@ void multipleLevel(std::filesystem::path& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath.string());
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
@@ -119,10 +98,13 @@ void multipleLevel(std::filesystem::path& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator   = true;
-    bool useLevels          = true;
+    bool useLevels = true;
     std::string scalingType = "strong"; // "strong" // "weak"
-    // para->setUseStreams(true);                        // set in config
-    // para->useReducedCommunicationAfterFtoC = true;    // set in config
+
+    const std::string outPath("output/" + std::to_string(para->getNumprocs()) + "GPU/");
+    const std::string simulationName("SphereScaling");
+    const std::string gridPath = "./output/grids/";
+    const std::string stlPath("./stl/SphereScaling/");
 
     if (para->getNumprocs() == 1) {
         para->useReducedCommunicationAfterFtoC = false;
@@ -130,10 +112,9 @@ void multipleLevel(std::filesystem::path& configPath)
     if (scalingType != "weak" && scalingType != "strong")
         std::cerr << "unknown scaling type" << std::endl;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    std::string gridPath(gridPathParent); // only for GridGenerator, for GridReader the gridPath needs to be set in the config file
 
     real dxGrid      = (real)1.0;
-    real vxLB        = (real)0.0005; // LB units
+    real vxLB        = (real)0.005;  // LB units
     real viscosityLB = 0.001;        //(vxLB * dxGrid) / Re;
 
     para->setVelocityLB(vxLB);
@@ -142,14 +123,9 @@ void multipleLevel(std::filesystem::path& configPath)
     para->setViscosityRatio((real)0.058823529);
     para->setDensityRatio((real)998.0);
 
-
-    // para->setTimestepOut(10);
-    // para->setTimestepEnd(10);
-
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-
     para->setOutputPrefix(simulationName);
     if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setPrintFiles(true);
@@ -159,12 +135,8 @@ void multipleLevel(std::filesystem::path& configPath)
     else
         para->setMaxLevel(1);
 
-    // para->setMainKernel("CumulantK17CompChim");
-    para->setMainKernel("CumulantK17CompChimStream");
-    //para->setMainKernel("CumulantK17CompChimRedesigned");
-    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleRhoSq);
-
-
+    para->setMainKernel(vf::CollisionKernel::Compressible::CumulantK17);
+    scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -214,7 +186,7 @@ void multipleLevel(std::filesystem::path& configPath)
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNummberOfProcess() == 2) {
+            if (communicator.getNumberOfProcess() == 2) {
                 real zSplit = 0.5 * sideLengthCube;
 
                 if (scalingType == "weak") {
@@ -288,7 +260,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
 
-            } else if (communicator.getNummberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcess() == 4) {
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
 
@@ -404,7 +376,7 @@ void multipleLevel(std::filesystem::path& configPath)
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNummberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcess() == 8) {
                 real xSplit = 0.5 * sideLengthCube;
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5789cdf96049b7c0a31ce693c29cd2db4952a58
--- /dev/null
+++ b/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt
@@ -0,0 +1,17 @@
+##################################################
+#GPU Mapping
+##################################################
+Devices="0 1 2 3"
+NumberOfDevices=4
+
+##################################################
+#simulation parameter
+##################################################
+TimeEnd=10000
+TimeOut=10000
+
+##################################################
+# CUDA Streams and optimized communication (only used for multiple GPUs)
+##################################################
+useStreams = true
+useReducedCommunicationInInterpolation = true
\ No newline at end of file
diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
index 7514c2b273bf60d6e2523f132911dde8839d296a..cf092ddf00325caa5ca8f2f280d1de18d30eee16 100644
--- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
+++ b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
@@ -124,7 +124,7 @@ uint gpuIndex = 0;
 bool useLimiter = false;
 bool useWale = false;
 
-std::string kernel( "CumulantK17Comp" );
+std::string kernel( "CumulantK17" );
 
 //std::string path("F:/Work/Computations/out/TaylorGreen3DNew/"); //LEGOLAS
 std::string path("D:/out/TGV_3D/"); //TESLA03
@@ -156,7 +156,7 @@ void multipleLevel(const std::string& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
index a88fee2e583a7cb227702ff19ada7daced1b1708..a6c31c7039d027ad62d5b46ae84517c55b171624 100644
--- a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
+++ b/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
@@ -150,7 +150,7 @@ void multipleLevel(const std::string& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
index 045c208274bc6bc216d25e8c2fa905916a52f87b..c6d78e2634e9728958b6f933d6f517553a7fae42 100644
--- a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
+++ b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
@@ -181,7 +181,7 @@ void multipleLevel(const std::string& configPath)
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index 3e083afd690632dbaabdde5d00f2ab454d86032b..168954898b060c70885c8f160e9aebf0a39ec8d3 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -227,7 +227,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -778,7 +778,7 @@ int main( int argc, char* argv[])
 {
     try
     {
-        vf::logging::Logger::initalizeLogger();
+        vf::logging::Logger::initializeLogger();
 
         // assuming that the config files is stored parallel to this file.
         std::filesystem::path filePath = __FILE__;
diff --git a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
index 40a9aeb6212baf4e6b4354b0bd9c13dea95e0e37..41ef914f2dae32d0ce5e5e6df1df7ff0879cf4d8 100644
--- a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
+++ b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
@@ -80,7 +80,7 @@ void multipleLevel(const std::string& configPath)
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
 
diff --git a/apps/gpu/LBM/lbmTest/main.cpp b/apps/gpu/LBM/lbmTest/main.cpp
index 79d767cca40d710a41c7d2d6d1c512f74270023c..295d85ec99e556e27ee95121cea599b74eb96c4e 100644
--- a/apps/gpu/LBM/lbmTest/main.cpp
+++ b/apps/gpu/LBM/lbmTest/main.cpp
@@ -66,7 +66,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 	Communicator* comm = Communicator::getInstanz();
 
 	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNummberOfProcess());
+	para->setNumprocs(comm->getNumberOfProcess());
 	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
 	para->setMyID(comm->getPID());
 	
diff --git a/apps/gpu/LBM/metisTest/main.cpp b/apps/gpu/LBM/metisTest/main.cpp
index 6a62ff72c7b71211610ba4e40f81e9a1f527eb7f..3cbb6838119625d13759662dda4a020126c896af 100644
--- a/apps/gpu/LBM/metisTest/main.cpp
+++ b/apps/gpu/LBM/metisTest/main.cpp
@@ -66,7 +66,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
 	Communicator* comm = Communicator::getInstanz();
 
 	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNummberOfProcess());
+	para->setNumprocs(comm->getNumberOfProcess());
 	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
 	para->setMyID(comm->getPID());
 	
diff --git a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
index 8d715e4b4cd49e6dbf92da3aedddbc4b869067c4..514dc5053e9574b452d80f61eb3d4e1ebb0f4396 100644
--- a/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
+++ b/pythonbindings/pyfluids-stubs/bindings/gpu/grid_generator.pyi
@@ -67,7 +67,7 @@ class LevelGridBuilder(GridBuilder):
     def set_precursor_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, file_collection: pyfluids.bindings.gpu.VelocityFileCollection, n_t_read: int, velocity_x: float = ..., velocity_y: float = ..., velocity_z: float = ..., file_level_to_grid_level_map: List[int] = ...) -> None: ...
     def set_pressure_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, rho: float) -> None: ...
     def set_slip_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float) -> None: ...
-    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float) -> None: ...
+    def set_stress_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, normal_x: float, normal_y: float, normal_z: float, sampling_offset: int, z0: float, dx: float, q: float) -> None: ...
     def set_velocity_boundary_condition(self, side_type: pyfluids.bindings.gpu.SideType, vx: float, vy: float, vz: float) -> None: ...
 
 class MultipleGridBuilder(LevelGridBuilder):
diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp
index 26a57061933fbdbfe3447ec89eeb07116a9b974b..0230caf197c04c2f2cd411288e9ea24ee314c4a8 100644
--- a/pythonbindings/src/gpu/submodules/communicator.cpp
+++ b/pythonbindings/src/gpu/submodules/communicator.cpp
@@ -41,7 +41,7 @@ namespace communicator
     {
         py::class_<vf::gpu::Communicator, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator")
         .def_static("get_instance", &vf::gpu::Communicator::getInstance, py::return_value_policy::reference)
-        .def("get_number_of_process", &vf::gpu::Communicator::getNummberOfProcess)
+        .def("get_number_of_process", &vf::gpu::Communicator::getNumberOfProcess)
         .def("get_pid", &vf::gpu::Communicator::getPID);
     }
 }
\ No newline at end of file
diff --git a/pythonbindings/src/logger/logger.cpp b/pythonbindings/src/logger/logger.cpp
index 555b502fa9a56299895de0fa6dd6cfeb66c15024..c4c99c0a5077303b398e0726eaba0420ddb0dceb 100644
--- a/pythonbindings/src/logger/logger.cpp
+++ b/pythonbindings/src/logger/logger.cpp
@@ -42,7 +42,7 @@ namespace logging
         py::module loggerModule = parentModule.def_submodule("logger");
 
         py::class_<vf::logging::Logger>(loggerModule, "Logger")
-        .def_static("initialize_logger", &vf::logging::Logger::initalizeLogger)
+        .def_static("initialize_logger", &vf::logging::Logger::initializeLogger)
         .def_static("change_log_path", &vf::logging::Logger::changeLogPath, py::arg("path"));
 
         // use f-strings (f"text {float}") in python for compounded messages
diff --git a/regression-tests/multigpu_test/rocket.yml b/regression-tests/multigpu_test/rocket4GPU.yml
similarity index 72%
rename from regression-tests/multigpu_test/rocket.yml
rename to regression-tests/multigpu_test/rocket4GPU.yml
index f621b1349c042e02f2e834e697147da0822ffe1f..a05ffea6ad04e0d5cfb8d7749111726dfceb4609 100755
--- a/regression-tests/multigpu_test/rocket.yml
+++ b/regression-tests/multigpu_test/rocket4GPU.yml
@@ -3,8 +3,8 @@ user: $PHOENIX_REMOTE_USER
 private_keyfile: $PHOENIX_PRIVATE_KEY
 
 copy:
-  - from: regression-tests/multigpu_test/slurm.job
-    to: multigpu_test/slurm.job
+  - from: regression-tests/multigpu_test/slurm4GPU.job
+    to: multigpu_test/slurm4GPU.job
     overwrite: true
 
   - from: "CMake/"
@@ -36,13 +36,16 @@ copy:
     overwrite: true
 
 collect:
-  - from: multigpu_test/output/
-    to: output/results/
+  - from: multigpu_test/output/4GPU/
+    to: output/4GPU
     overwrite: true
 
-  - from: multigpu_test/slurmMultiGPU.out
-    to: output/slurmMultiGPU.out
+  - from: multigpu_test/slurm4GPU.out
+    to: output/4GPU/slurm4GPU.out
     overwrite: true
 
-sbatch: multigpu_test/slurm.job
+clean:
+  - multigpu_test/output/*
+
+sbatch: multigpu_test/slurm4GPU.job
 continue_if_job_fails: true
diff --git a/regression-tests/multigpu_test/rocket8GPU.yml b/regression-tests/multigpu_test/rocket8GPU.yml
new file mode 100755
index 0000000000000000000000000000000000000000..e8cc08a9fa39425686a16d193dba1743533994bc
--- /dev/null
+++ b/regression-tests/multigpu_test/rocket8GPU.yml
@@ -0,0 +1,51 @@
+host: $PHOENIX_REMOTE_HOST
+user: $PHOENIX_REMOTE_USER
+private_keyfile: $PHOENIX_PRIVATE_KEY
+
+copy:
+  - from: regression-tests/multigpu_test/slurm8GPU.job
+    to: multigpu_test/slurm8GPU.job
+    overwrite: true
+
+  - from: "CMake/"
+    to: "multigpu_test/CMake/"
+    overwrite: true
+
+  - from: "3rdParty/"
+    to: "multigpu_test/3rdParty/"
+    overwrite: true
+
+  - from: "CMakeLists.txt"
+    to: "multigpu_test/CMakeLists.txt"
+    overwrite: true
+
+  - from: "gpu.cmake"
+    to: "multigpu_test/gpu.cmake"
+    overwrite: true
+
+  - from: "src/"
+    to: "multigpu_test/src/"
+    overwrite: true
+
+  - from: "CMakePresets.json"
+    to: "multigpu_test/CMakePresets.json"
+    overwrite: true
+
+  - from: "apps/gpu/LBM/"
+    to: "multigpu_test/apps/gpu/LBM/"
+    overwrite: true
+
+collect:
+  - from: multigpu_test/output/8GPU/
+    to: output/8GPU
+    overwrite: true
+
+  - from: multigpu_test/slurm8GPU.out
+    to: output/8GPU/slurm8GPU.out
+    overwrite: true
+
+clean:
+  - multigpu_test/output/*
+
+sbatch: multigpu_test/slurm8GPU.job
+continue_if_job_fails: true
diff --git a/regression-tests/multigpu_test/slurm.job b/regression-tests/multigpu_test/slurm4GPU.job
similarity index 61%
rename from regression-tests/multigpu_test/slurm.job
rename to regression-tests/multigpu_test/slurm4GPU.job
index 0ee0df46ab64bab6520f9f46fc939d5b3186fae7..886bfaf7479e01cfef285e9a2dae189258ce0b7e 100755
--- a/regression-tests/multigpu_test/slurm.job
+++ b/regression-tests/multigpu_test/slurm4GPU.job
@@ -3,10 +3,10 @@
 #SBATCH --partition=gpu01_queue
 #SBATCH --nodes=1
 #SBATCH --time=10:00:00
-#SBATCH --job-name=Cavity4GPU
+#SBATCH --job-name=Regr4GPU
 #SBATCH --ntasks-per-node=4
 #SBATCH --gres=gpu:4
-#SBATCH --output=multigpu_test/slurmMultiGPU.out
+#SBATCH --output=multigpu_test/slurm4GPU.out
 ##SBATCH --exclusive
 
 module purge 
@@ -21,9 +21,13 @@ module list
 cd multigpu_test
 mkdir -p build
 cd build
-cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS="apps/gpu/LBM/DrivenCavityMultiGPU"
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
 make -j 16
 cd ..
 mkdir -p output
 
-mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU.txt"
\ No newline at end of file
+echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 4 GPUs\n\n'
+mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "configPhoenix4GPU_regressionTest.txt"
+
+echo $'\n\n\n\n---Second test: SphereScaling on 4 GPUs\n\n'
+mpirun -np 4 "./build/bin/SphereScaling"        "configPhoenix4GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/multigpu_test/slurm8GPU.job b/regression-tests/multigpu_test/slurm8GPU.job
new file mode 100755
index 0000000000000000000000000000000000000000..333d5c77b176329947fb5d0452a0187208f323d4
--- /dev/null
+++ b/regression-tests/multigpu_test/slurm8GPU.job
@@ -0,0 +1,33 @@
+#!/bin/bash -l
+
+#SBATCH --partition=gpu01_queue
+#SBATCH --nodes=2
+#SBATCH --time=10:00:00
+#SBATCH --job-name=Regr8GPU
+#SBATCH --ntasks-per-node=4
+#SBATCH --gres=gpu:4
+#SBATCH --output=multigpu_test/slurm8GPU.out
+##SBATCH --exclusive
+
+module purge 
+module load comp/ccache/4.1 # loads comp/gcc/9.3.0 
+module load mpi/openmpi/4.0.5_gcc_9.3/openmpi 
+module load cuda/11.3
+module load comp/git/2.27.0
+PATH=/home/irmb/tools/cmake-3.20.3-linux-x86_64/bin:$PATH
+
+module list
+
+cd multigpu_test
+mkdir -p build
+cd build
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
+make -j 16
+cd ..
+mkdir -p output
+
+echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 8 GPUs\n\n'
+mpirun -np 8 "./build/bin/DrivenCavityMultiGPU" "configPhoenix8GPU_regressionTest.txt"
+
+echo $'\n\n\n\n---Second test: SphereScaling on 8 GPUs\n\n'
+mpirun -np 8 "./build/bin/SphereScaling"        "configPhoenix8GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/reference_data b/regression-tests/reference_data
index a5787a2312095d7374a8cfe6225d4d1409024081..794c3f838577d2b1f4db22c19861cbf0fed4fee6 160000
--- a/regression-tests/reference_data
+++ b/regression-tests/reference_data
@@ -1 +1 @@
-Subproject commit a5787a2312095d7374a8cfe6225d4d1409024081
+Subproject commit 794c3f838577d2b1f4db22c19861cbf0fed4fee6
diff --git a/src/basics/CMakeLists.txt b/src/basics/CMakeLists.txt
index 7f871424b2c6849d2c0f6e8d277b17214fa5cd9c..14e1760fe3cb6f9b1e122f4adc5adbfb3e30b5c7 100644
--- a/src/basics/CMakeLists.txt
+++ b/src/basics/CMakeLists.txt
@@ -21,4 +21,8 @@ IF(MSVC)
     target_compile_definitions(${library_name} PUBLIC NOMINMAX) # Disable Min/Max-Macros
 ENDIF(MSVC)
 
+if(BUILD_USE_BOOST)
+    target_link_libraries(${library_name} PRIVATE Boost::boost)
+endif()
+
 vf_add_tests()
diff --git a/src/basics/geometry3d/GbImplicitSurface.cpp b/src/basics/geometry3d/GbImplicitSurface.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6dec5717341d0f55e2b34b3e87d4f15e2b077f8c
--- /dev/null
+++ b/src/basics/geometry3d/GbImplicitSurface.cpp
@@ -0,0 +1,446 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GbImplicitSurface.cpp
+//! \ingroup geometry3d
+//! \author Hussein Alihussein
+//=======================================================================================
+
+#include <GbImplicitSurface.h>
+
+#ifdef BUILD_USE_BOOST
+
+#include <basics/utilities/UbMath.h>
+
+#include <geometry3d/GbSystem3D.h>
+#include <geometry3d/GbTriangle3D.h>
+
+#include <boost/math/tools/roots.hpp>
+
+
+using namespace std;
+using boost::math::tools::bisect;
+
+/*=======================================================*/
+// ObObjectCreator* GbImplicitSurface::getCreator()
+// {
+// 	 GbObject3DCreator instance;
+// 	return &instance;
+// }
+/*=======================================================*/
+// Konstruktor
+GbImplicitSurface::GbImplicitSurface() //: GbObject3D()
+{
+
+}
+/*=======================================================*/
+// Konstruktor
+GbImplicitSurface::GbImplicitSurface(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b, const double& edgeLength, const double& dx, const double& thickness) :GbObject3D()
+{
+	this->p1 = new GbPoint3D(x1a, x2a, x3a);
+	this->p2 = new GbPoint3D(x1b, x2b, x3b);
+    this->p1->addObserver(this);
+    this->p2->addObserver(this);
+
+	this->p3 = new GbPoint3D(x1a, x2a, x3a);
+	this->p4 = new GbPoint3D(x1b, x2b, x3b);
+    this->p3->addObserver(this);
+    this->p4->addObserver(this);
+
+	this->edgeLength = edgeLength;
+	this->dx = dx;
+	this->thickness = thickness;
+}
+/*=======================================================*/
+// Konstruktor
+//GbImplicitSurface::GbImplicitSurface(
+//	const double& x1a, const double& x2a, const double& x3a,
+//	const double& x1b, const double& x2b, const double& x3b,
+//
+//	const double& x1c, const double& x2c, const double& x3c,
+//	const double& x1d, const double& x2d, const double& x3d,
+//
+//	const double& edgeLength, const double& dx) :GbObject3D()
+//{
+//	this->p1 = new GbPoint3D(x1a, x2a, x3a);
+//	this->p2 = new GbPoint3D(x1b, x2b, x3b);
+//
+//	this->p3 = new GbPoint3D(x1c, x2c, x3c);
+//	this->p4 = new GbPoint3D(x1d, x2d, x3d);
+//
+//	this->edgeLength = edgeLength;
+//	this->dx = dx;
+//}
+GbImplicitSurface::GbImplicitSurface(GbImplicitSurface * imp)
+{
+}
+/*=======================================================*/
+// Destruktor
+GbImplicitSurface::~GbImplicitSurface()
+{
+    if (this->p1)
+        this->p1->removeObserver(this);
+    if (this->p2)
+        this->p2->removeObserver(this);
+    if (this->p3)
+        this->p3->removeObserver(this);
+    if (this->p4)
+        this->p4->removeObserver(this);
+}
+/*=======================================================*/
+struct TerminationCondition {
+	bool operator() (double min, double max) {
+		return abs(min - max) <= 10e-10;
+	}
+};
+/*============================================M-===========*/
+struct FunctionToApproximate {
+	double x, y, z;
+	double dir1, dir2, dir3, L;
+	double operator() (double q) {
+		return sin(2.*M_PI / L*(x + q*dir1))*cos(2.*M_PI / L*(y + q*dir2)) + sin(2.*M_PI / L*(y + q*dir2))*cos(2.*M_PI / L*(z + q*dir3)) + sin(2.*M_PI / L*(z + q*dir3))*cos(2.*M_PI / L*(x + q*dir1));
+	}
+};
+/*=======================================================*/
+struct FunctionGyroidThirdOrder {
+	double x, y, z;
+	double dir1, dir2, dir3, L;
+	double h;
+	
+	double t17, t3, t2, t18, t20, t8, t13, t5, t9, t6, t11, t14;
+	double f300, f210, f201, f120, f102, f030, f021, f012, f003, f200, f110, f101, f020, f011, f002, f100, f010, f001, f000;
+
+	double repeatedTerm, repeatedTermRoot;
+	double T1, T2, T3, T4, T5, T6, T7, T8, T9, Gyroidh;
+
+	double operator() (double q) {
+	//sins and cosines combinations 
+	 t2  = sin((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t3  = sin((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t5  = cos((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(x+q*dir1)) / L);
+	 t6  = cos((2. * M_PI*(z+q*dir3)) / L)*sin((2. * M_PI*(x+q*dir1)) / L);
+	 t8  = sin((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t9  = cos((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t11 = cos((2. * M_PI*(z+q*dir3)) / L)*sin((2. * M_PI*(y+q*dir2)) / L);
+	 t13 = cos((2. * M_PI*(x+q*dir1)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t14 = cos((2. * M_PI*(y+q*dir2)) / L)*sin((2. * M_PI*(z+q*dir3)) / L);
+	 t17 = cos((2. * M_PI*(x+q*dir1)) / L)*cos((2. * M_PI*(y+q*dir2)) / L);
+	 t18 = cos((2. * M_PI*(x+q*dir1)) / L)*cos((2. * M_PI*(z+q*dir3)) / L);
+	 t20 = cos((2. * M_PI*(y+q*dir2)) / L)*cos((2. * M_PI*(z+q*dir3)) / L);
+
+	//Gyroid third order derivatives
+	 f300 = (8. * pow(M_PI, 3.)*(-t17 + t3)) / pow(L, 3.);
+	 f210 = (8. * pow(M_PI, 3.)*t2) / pow(L, 3.);
+	 f201 = (-8. * pow(M_PI, 3.)*t18) / pow(L, 3.);
+	 f120 = (-8. * pow(M_PI, 3.)*t17) / pow(L, 3.);
+	 f102 = (8. * pow(M_PI, 3.)*t3) / pow(L, 3.);
+	 f030 = (8. * pow(M_PI, 3.)*(t2 - t20)) / pow(L, 3.);
+	 f021 = (8. * pow(M_PI, 3.)*t8) / pow(L, 3.);
+	 f012 = (-8. * pow(M_PI, 3.)*t20) / pow(L, 3.);
+	 f003 = (8. * pow(M_PI, 3.)*(-t18 + t8)) / pow(L, 3.);
+
+	//Gyroid second order derivatives
+	 f200 = (-4. * pow(M_PI, 2.)*(t13 + t5)) / pow(L, 2.);
+	 f110 = (-4. * pow(M_PI, 2.)*t9) / pow(L, 2.);
+	 f101 = (-4. * pow(M_PI, 2.)*t6) / pow(L, 2.);
+	 f020 = (-4. * pow(M_PI, 2.)*(t11 + t5)) / pow(L, 2.);
+	 f011 = (-4. * pow(M_PI, 2.)*t14) / pow(L, 2.);
+	 f002 = (-4. * pow(M_PI, 2.)*(t11 + t13)) / pow(L, 2.);
+
+	//Gyroid first order derivatives
+	 f100 = (2. * M_PI*(t17 - t3)) / L;
+	 f010 = (2. * M_PI*(-t2 + t20)) / L;
+	 f001 = (2. * M_PI*(t18 - t8)) / L;
+
+	//Gyroid 
+	 f000 = t11 + t13 + t5;
+
+	 repeatedTerm = f100*f100 + f010*f010 + f001*f001;
+	 repeatedTermRoot = sqrt(repeatedTerm);
+
+	 T1 = f001*f002 + f010*f011 + f100*f101;
+	 T2 = f001*f011 + f010*f020 + f100*f110;
+	 T3 = f001*f101 + f010*f110 + f100*f200;
+	 T4 = f002*f011 + f001*f012 + f011*f020 + f010*f021 + f101*f110;
+	 T5 = f002*f101 + f001*f102 + f011*f110 + f101*f200 + f100*f201;
+	 T6 = f011*f101 + f020*f110 + f010*f120 + f110*f200 + f100*f210;
+	 T7 = f001*f002*h + f010*f011*h + f100*f101*h;
+	 T8 = f001*f011*h + f010*f020*h + f100*f110*h;
+	 T9 = f001*f101*h + f010*f110*h + f100*f200*h;
+
+
+	 Gyroidh = 2 * h*sqrt(pow(f001 - (T1*h) / (2.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (2.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (2.*repeatedTermRoot), 2))
+		- (3 * h*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))) / 2.
+		- (3 * h*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot) + (h*((T7 - 3 * f001*repeatedTermRoot)*
+		(4 * pow(T1, 2)*h - 4 * (pow(f002, 2) + f001*f003 + pow(f011, 2) + f010*f012 + pow(f101, 2) + f100*f102)*h*repeatedTerm + 12 * f002*pow(repeatedTerm, 1.5)) +
+			(T8 - 3 * f010*repeatedTermRoot)*(4 * T1*T2*h - 4 * (T4)*h*repeatedTerm + 12 * f011*pow(repeatedTerm, 1.5)) +
+			(T9 - 3 * f100*repeatedTermRoot)*(4 * T1*T3*h - 4 * (T5)*h*repeatedTerm + 12 * f101*pow(repeatedTerm, 1.5)))) /
+			(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+				pow(repeatedTerm, 2)), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot) +
+				(h*((T7 - 3 * f001*repeatedTermRoot)*(4 * T1*T2*h - 4 * (T4)*h*repeatedTerm + 12 * f011*pow(repeatedTerm, 1.5)) +
+					(T8 - 3 * f010*repeatedTermRoot)*(4 * pow(T2, 2)*h - 4 * (pow(f011, 2) + pow(f020, 2) + f001*f021 + f010*f030 + pow(f110, 2) + f100*f120)*h*repeatedTerm + 12 * f020*pow(repeatedTerm, 1.5)) +
+					(T9 - 3 * f100*repeatedTermRoot)*(4 * T2*T3*h - 4 * (T6)*h*repeatedTerm + 12 * f110*pow(repeatedTerm, 1.5)))) /
+					(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+						pow(repeatedTerm, 2)), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot) +
+						(h*((T7 - 3 * f001*repeatedTermRoot)*(4 * T1*T3*h - 4 * (T5)*h*repeatedTerm + 12 * f101*pow(repeatedTerm, 1.5)) +
+							(T8 - 3 * f010*repeatedTermRoot)*(4 * T2*T3*h - 4 * (T6)*h*repeatedTerm + 12 * f110*pow(repeatedTerm, 1.5)) +
+							(T9 - 3 * f100*repeatedTermRoot)*(4 * pow(T3, 2)*h - 4 * (pow(f101, 2) + pow(f110, 2) + pow(f200, 2) + f001*f201 + f010*f210 + f100*f300)*h*repeatedTerm + 12 * f200*pow(repeatedTerm, 1.5)))) /
+							(108.*sqrt(pow(f001 - (T1*h) / (3.*repeatedTermRoot), 2) + pow(f010 - (T2*h) / (3.*repeatedTermRoot), 2) + pow(f100 - (T3*h) / (3.*repeatedTermRoot), 2))*
+								pow(repeatedTerm, 2)), 2))) / 2. + f000;
+	
+		return Gyroidh;
+	}
+};
+/*==========================================================*/
+bool GbImplicitSurface::isPointInGbObject3D(const double& x1, const double& x2, const double& x3)
+{
+	//double f = sin(2.*M_PI*x1/edgeLength)*cos(2.*M_PI*x2 / edgeLength) + sin(2.*M_PI*x2 / edgeLength)*cos(2.*M_PI*x3 / edgeLength) + sin(2.*M_PI*x3 / edgeLength)*cos(2.*M_PI*x1 / edgeLength);
+	//evaluateImplicitFunction(x1,x2,x3, 0., 0., 0.)
+	double f1 = evaluateImplicitFunction(x1, x2, x3, 1.);
+	double f2 = evaluateImplicitFunction(x1, x2, x3, -1.);
+	// 	if (f < 10.0E-15 && f > -10.0E-15)
+		//if (fabs(f) <= 10e-15)
+	 //if (f <= 0)
+	if (f1 <= 0. && f2 >= 0.)
+{
+	return true;
+}
+else
+{
+	return false;
+}
+}
+
+/*==========================================================*/
+double GbImplicitSurface::getIntersectionRaytraceFactor(const double& x1, const double& x2, const double& x3, const double& rx1, const double& rx2, const double& rx3)
+{
+	double from = 0;  // The solution must lie in the interval [from, to], additionally f(from) <= 0 && f(to) >= 0
+	double to = dx*sqrt(rx1*rx1+ rx2*rx2+ rx3*rx3);
+	FunctionGyroidThirdOrder f;
+	//FunctionToApproximate f;
+	f.x =x1 ;
+	f.y =x2 ;
+	f.z =x3 ;
+	f.dir1 = rx1;
+	f.dir2 = rx2;
+	f.dir3 = rx3;
+	f.L = edgeLength;
+	f.h = thickness;
+	if (f(from)*f(to)<0)
+		{
+		std::pair<double, double> result = bisect(f, from, to, TerminationCondition());
+		double root = (result.first + result.second) / 2;
+		return root;
+		}
+	f.h = -thickness;
+	if (f(from)*f(to) < 0)
+	{
+		std::pair<double, double> result = bisect(f, from, to, TerminationCondition());
+		double root = (result.first + result.second) / 2;
+		return root;
+	}
+	else
+	{
+		return 999;
+	}
+	
+}
+/*=======================================================*/
+double GbImplicitSurface::evaluateImplicitFunction(const double& x1, const double& x2, const double& x3, const double& position)
+{
+	double to = 0.;
+	FunctionGyroidThirdOrder f;
+	f.x = x1;
+	f.y = x2;
+	f.z = x3;
+	f.dir1 = 0.;
+	f.dir2 = 0.;
+	f.dir3 = 0.;
+	f.L = edgeLength;
+	f.h = position*thickness;
+	return f(to);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Centroid()
+{
+	return (0.5*(p1->x1 + p2->x1));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Minimum()
+{
+	return (this->p1->x1 < this->p2->x1 ? this->p1->x1 : this->p2->x1);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX1Maximum()
+{
+	return (this->p1->x1 > this->p2->x1 ? this->p1->x1 : this->p2->x1);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Centroid()
+{
+	return (0.5*(p1->x2 + p2->x2));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Minimum()
+{
+	return (this->p1->x2 < this->p2->x2 ? this->p1->x2 : this->p2->x2);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX2Maximum()
+{
+	return (this->p1->x2 > this->p2->x2 ? this->p1->x2 : this->p2->x2);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Centroid()
+{
+	return (0.5*(p1->x3 + p2->x3));
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Minimum()
+{
+	return (this->p1->x3 < this->p2->x3 ? this->p1->x3 : this->p2->x3);
+}
+/*=======================================================*/
+double GbImplicitSurface::getX3Maximum()
+{
+	return (this->p1->x3 > this->p2->x3 ? this->p1->x3 : this->p2->x3);
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellInsideGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if (this->isPointInGbObject3D(x1a, x2a, x3a)
+		&& this->isPointInGbObject3D(x1b, x2a, x3a)
+		&& this->isPointInGbObject3D(x1b, x2b, x3a)
+		&& this->isPointInGbObject3D(x1a, x2b, x3a)
+		&& this->isPointInGbObject3D(x1a, x2a, x3b)
+		&& this->isPointInGbObject3D(x1b, x2a, x3b)
+		&& this->isPointInGbObject3D(x1b, x2b, x3b)
+		&& this->isPointInGbObject3D(x1a, x2b, x3b))
+	{
+		return true;
+	}
+	return false;
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellInsideOrCuttingGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if ((this->isPointInGbObject3D(x1a, x2a, x3a) == false)
+		&& (this->isPointInGbObject3D(x1b, x2a, x3a) == false)
+		&& (this->isPointInGbObject3D(x1b, x2b, x3a) == false)
+		&& (this->isPointInGbObject3D(x1a, x2b, x3a) == false)
+		&& (this->isPointInGbObject3D(x1a, x2a, x3b) == false)
+		&& (this->isPointInGbObject3D(x1b, x2a, x3b) == false)
+		&& (this->isPointInGbObject3D(x1b, x2b, x3b) == false)
+		&& (this->isPointInGbObject3D(x1a, x2b, x3b) == false))
+	{
+		return false;
+	}
+	return true;
+}
+/*=======================================================*/
+bool GbImplicitSurface::isCellCuttingGbObject3D(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b)
+{
+	if (!this->isCellInsideGbObject3D(x1a, x2a, x3a, x1b, x2b, x3b)
+		&& this->isCellInsideOrCuttingGbObject3D(x1a, x2a, x3a, x1b, x2b, x3b))
+	{
+		return true;
+	}
+	return false;
+}
+/*=======================================================*/
+void GbImplicitSurface::addSurfaceTriangleSet(vector<UbTupleFloat3>& nodes, vector<UbTupleInt3>& triangles)
+{
+	/*0*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Minimum(), (float)getX3Minimum()));
+	/*1*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Minimum(), (float)getX3Minimum()));
+	/*2*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Maximum(), (float)getX3Minimum()));
+	/*3.*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Maximum(), (float)getX3Minimum()));
+
+	/*4*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Minimum(), (float)getX3Maximum()));
+	/*5*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Minimum(), (float)getX3Maximum()));
+	/*6*/nodes.push_back(makeUbTuple((float)getX1Maximum(), (float)getX2Maximum(), (float)getX3Maximum()));
+	/*7*/nodes.push_back(makeUbTuple((float)getX1Minimum(), (float)getX2Maximum(), (float)getX3Maximum()));
+
+	//"unten"
+	triangles.push_back(makeUbTuple(0, 1, 2));
+	triangles.push_back(makeUbTuple(0, 2, 3));
+	//"oben"
+	triangles.push_back(makeUbTuple(4, 5, 6));
+	triangles.push_back(makeUbTuple(4, 6, 7));
+	//"links"
+	triangles.push_back(makeUbTuple(0, 3, 7));
+	triangles.push_back(makeUbTuple(0, 7, 4));
+	//"rechts"                                                               
+	triangles.push_back(makeUbTuple(1, 2, 6));
+	triangles.push_back(makeUbTuple(1, 6, 5));
+	//"hinten"                                                                       
+	triangles.push_back(makeUbTuple(3, 2, 7));
+	triangles.push_back(makeUbTuple(2, 7, 6));
+	//"vorne"                                                                        
+	triangles.push_back(makeUbTuple(0, 1, 5));
+	triangles.push_back(makeUbTuple(0, 5, 4));
+}
+/*==========================================================*/
+void GbImplicitSurface::objectChanged(UbObservable *changedObject)
+{
+    GbPoint3D *point = dynamic_cast<GbPoint3D *>(changedObject);
+    if (!point || (this->p1 != point && this->p2 != point && this->p3 != point && this->p4 != point))
+        return;
+
+    this->notifyObserversObjectChanged();
+}
+/*==========================================================*/
+void GbImplicitSurface::objectWillBeDeleted(UbObservable *objectForDeletion)
+{
+    if (this->p1) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p1);
+        if (objectForDeletion == observedObj) {
+            this->p1 = NULL;
+        }
+    }
+    if (this->p2) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p2);
+        if (objectForDeletion == observedObj) {
+            this->p2 = NULL;
+        }
+    }
+    if (this->p3) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p3);
+        if (objectForDeletion == observedObj) {
+            this->p3 = NULL;
+        }
+    }
+    if (this->p4) {
+        UbObservable *observedObj = dynamic_cast<UbObservable *>(this->p4);
+        if (objectForDeletion == observedObj) {
+            this->p4 = NULL;
+        }
+    }
+    // ACHTUNG: eigentlich muessten in allen methoden von GbLine if abfragen fuer NULL pointer hin... toDo
+}
+
+#endif
diff --git a/src/basics/geometry3d/GbImplicitSurface.h b/src/basics/geometry3d/GbImplicitSurface.h
new file mode 100644
index 0000000000000000000000000000000000000000..a72c9442eca57d3bdd99887c8e5692afec939e1d
--- /dev/null
+++ b/src/basics/geometry3d/GbImplicitSurface.h
@@ -0,0 +1,161 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GbImplicitSurface.cpp
+//! \ingroup geometry3d
+//! \author Hussein Alihussein
+//=======================================================================================
+#ifndef GbImplicitSurface_H
+#define GbImplicitSurface_H
+
+#ifdef BUILD_USE_BOOST
+
+#include <vector>
+
+#include <GbPoint3D.h>
+#include <basics/utilities/UbObserver.h>
+#include <basics/utilities/UbMath.h>
+
+#define _USE_MATH_DEFINES
+#include <cmath>
+#include <math.h> 
+
+class GbLine3D;
+class GbObject3DCreator;
+
+#include <PointerDefinitions.h>
+class GbImplicitSurface;
+using GbImplicitSurfacePtr = SPtr<GbImplicitSurface>;
+
+
+class GbImplicitSurface : public GbObject3D, public UbObserver
+{
+public:
+	GbImplicitSurface();
+	GbImplicitSurface(const double& x1a, const double& x2a, const double& x3a, const double& x1b, const double& x2b, const double& x3b, const double& edgeLength, const double& dx, const double& thickness=0);
+
+	GbImplicitSurface(const double & x1a, const double & x2a, const double & x3a, const double & x1b, const double & x2b, const double & x3b, const double & x1c, const double & x2c, const double & x3c, const double & x1d, const double & x2d, const double & x3d, const double & edgeLength, const double & dx);
+	//GbImplicitSurface(const double& minX1, const double& minX2, const double& minX3, const double& maxX1, const double& maxX2, const double& maxX3);
+	GbImplicitSurface(GbImplicitSurface *imp);
+	~GbImplicitSurface();
+
+	GbImplicitSurface* clone() override { return new GbImplicitSurface(this); }
+	void finalize() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+
+
+	double getX1Centroid() override;
+	double getX1Minimum() override;
+	double getX1Maximum() override;
+	double getX2Centroid()override;
+	double getX2Minimum() override;
+	double getX2Maximum() override;
+	double getX3Centroid()override;
+	double getX3Minimum() override;
+	double getX3Maximum() override;
+    void setCenterCoordinates(const double &x1, const double &x2, const double &x3) override {throw UbException(UB_EXARGS, "finalize() - not implemented");
+    }
+
+	void translate(const double& x1, const double& x2, const double& x3) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+	void rotate(const double& rx1, const double& rx2, const double& rx3) override{ throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+	void scale(const double& sx1, const double& sx2, const double& sx3) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	double getLengthX1();
+	double getLengthX2();
+	double getLengthX3();
+	
+	bool isPointInGbObject3D(const double &x1p, const double &x2p, const double &x3p, bool &pointinboundary) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+    bool isPointInGbObject3D(const double &x1p, const double &x2p, const double &x3p) override;
+    bool isCellInsideGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                const double &x2b, const double &x3b) override;
+    bool isCellCuttingGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                 const double &x2b, const double &x3b) override;
+    bool isCellInsideOrCuttingGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                         const double &x2b, const double &x3b) override;
+    double getCellVolumeInsideGbObject3D(const double &x1a, const double &x2a, const double &x3a, const double &x1b,
+                                         const double &x2b, const double &x3b) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	GbPoint3D *calculateInterSectionPoint3D(GbPoint3D &point1, GbPoint3D &point2);
+	//GbImplicitSurface* createClippedRectangle3D(const double& x1a,const double& x2a,const double& x3a,const double& x1b,const double& x2b,const double& x3b);
+    GbLine3D *createClippedLine3D (GbPoint3D &point1, GbPoint3D &point2) override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	std::vector<GbTriangle3D *> getSurfaceTriangleSet() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+	 void addSurfaceTriangleSet(std::vector<UbTupleFloat3> &nodes, std::vector<UbTupleInt3> &triangles) override;
+
+	bool hasRaytracing() override { return true;  }
+
+	/*|r| must be 1! einheitsvector!!*/
+	double getIntersectionRaytraceFactor (const double& x1, const double& x2, const double& x3, const double& rx1, const double& rx2, const double& rx3) override;
+
+	double evaluateImplicitFunction(const double & x1, const double & x2, const double & x3, const double & position);
+
+	double getDistance(const double& x1p, const double& x2p, const double& x3p)
+	{
+		throw UbException(UB_EXARGS, "not implemented");
+
+		// falls punkt innerhalt ist: minimalen abstand ausrechnen
+		if (this->isPointInGbObject3D(x1p, x2p, x3p))
+		{
+			double x1Dist = UbMath::min(std::abs(x1p - this->getX1Minimum()), std::abs(x1p - this->getX1Maximum()));
+			double x2Dist = UbMath::min(std::abs(x2p - this->getX2Minimum()), std::abs(x2p - this->getX2Maximum()));
+			double x3Dist = UbMath::min(std::abs(x3p - this->getX3Minimum()), std::abs(x3p - this->getX3Maximum()));
+
+			return UbMath::min(x1Dist, x2Dist, x3Dist);
+		}
+		else
+		{
+
+		}
+	}
+
+	std::string toString() override { throw UbException(UB_EXARGS, "finalize() - not implemented"); }
+
+
+ // virtuelle Methoden von UbObserver
+    void objectChanged(UbObservable *changedObject) override;
+    void objectWillBeDeleted(UbObservable *objectForDeletion) override;
+
+	using GbObject3D::isPointInGbObject3D; //Grund: dadurch muss man hier  isPointInGbObject3D(GbPoint3D*) nicht ausprogrammieren, welche sonst hier "ueberdeckt" waere
+
+
+protected:
+	GbPoint3D* p1;
+	GbPoint3D* p2;
+	GbPoint3D* p3;
+	GbPoint3D* p4;
+	double edgeLength;
+	double dx;
+	double thickness;
+private:
+};
+
+
+
+#endif   
+#endif
diff --git a/src/cpu/VirtualFluids.h b/src/cpu/VirtualFluids.h
index 8aed1556b058c8420d79eab32646ae10112ec288..1ee4c7e78aded0e11ea723769d4515f0f1ec846d 100644
--- a/src/cpu/VirtualFluids.h
+++ b/src/cpu/VirtualFluids.h
@@ -121,6 +121,8 @@
 #include <BoundaryConditions/NoSlipBCAlgorithm.h>
 #include <BoundaryConditions/NonEqDensityBCAlgorithm.h>
 #include <BoundaryConditions/NonReflectingOutflowBCAlgorithm.h>
+#include <BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h>
+#include <BoundaryConditions/NonReflectingInflowBCAlgorithm.h>
 #include <BoundaryConditions/SlipBCAdapter.h>
 #include <BoundaryConditions/SlipBCAlgorithm.h>
 #include <BoundaryConditions/ThinWallBCProcessor.h>
@@ -204,6 +206,8 @@
 #include <CoProcessors/ShearStressCoProcessor.h>
 #include <CoProcessors/TimeseriesCoProcessor.h>
 #include <CoProcessors/TurbulenceIntensityCoProcessor.h>
+#include <CoProcessors/TimeAveragedValuesCoProcessor.h>
+
 //#include <CoProcessors/MeanValuesCoProcessor.h>
 #include <CoProcessors/InSituCatalystCoProcessor.h>
 #include <CoProcessors/LineTimeSeriesCoProcessor.h>
@@ -265,6 +269,7 @@
 #include <geometry3d/GbCylinder3D.h>
 #include <geometry3d/GbHalfSpace3D.h>
 #include <geometry3d/GbHalfSpaceKrischan3D.h>
+#include <geometry3d/GbImplicitSurface.h>
 #include <geometry3d/GbLine3D.h>
 #include <geometry3d/GbMeshTools3D.h>
 #include <geometry3d/GbObject3D.h>
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
index fa964f96b4f05801b0dc4afc48d19a68c5b1c133..f182546b0740cbff6b66b3849e2c67e42de1a98d 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
@@ -69,9 +69,9 @@ public:
     static const char RheologyPowellEyringModelNoSlipBCAlgorithm           = 18;
     static const char RheologyBinghamModelVelocityBCAlgorithm              = 19;
     static const char MultiphaseNoSlipBCAlgorithm                  = 20;
-    static const char MultiphaseVelocityBCAlgorithm = 21;
-
-
+    static const char MultiphaseVelocityBCAlgorithm                      = 21;
+    static const char NonReflectingInflowBCAlgorithm = 22;
+    static const char NonReflectingOutflowBCAlgorithmWithRelaxation = 23;
 
 public:
     BCAlgorithm() = default;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..078e8bfb4c574b9a6fe90a5bd569d2969237eb66
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.cpp
@@ -0,0 +1,341 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingInflowBCAlgorithm.cpp
+//! \ingroup BoundarConditions
+//! \author Hussein Alihussein
+//=======================================================================================
+#include "NonReflectingInflowBCAlgorithm.h"
+
+#include "BoundaryConditions.h"
+#include "D3Q27System.h"
+#include "DistributionArray3D.h"
+
+NonReflectingInflowBCAlgorithm::NonReflectingInflowBCAlgorithm()
+{
+    BCAlgorithm::type         = BCAlgorithm::NonReflectingInflowBCAlgorithm;
+    BCAlgorithm::preCollision = true;
+}
+//////////////////////////////////////////////////////////////////////////
+NonReflectingInflowBCAlgorithm::~NonReflectingInflowBCAlgorithm() = default;
+//////////////////////////////////////////////////////////////////////////
+SPtr<BCAlgorithm> NonReflectingInflowBCAlgorithm::clone()
+{
+    SPtr<BCAlgorithm> bc(new NonReflectingInflowBCAlgorithm());
+    return bc;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingInflowBCAlgorithm::addDistributions(SPtr<DistributionArray3D> distributions)
+{
+    this->distributions = distributions;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingInflowBCAlgorithm::applyBC()
+{
+    using namespace vf::lbm::dir;
+    using namespace D3Q27System;
+ //   using namespace UbMath;
+    using namespace vf::lbm::constant;
+
+    LBMReal f[ENDF + 1];
+    LBMReal ftemp[ENDF + 1];
+
+    int nx1       = x1;
+    int nx2       = x2;
+    int nx3       = x3;
+    int direction = -1;
+
+    // flag points in direction of fluid
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
+        nx1 += 1;
+        direction = DIR_P00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
+        nx1 -= 1;
+        direction = DIR_M00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
+        nx2 += 1;
+        direction = DIR_0P0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
+        nx2 -= 1;
+        direction = DIR_0M0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
+        nx3 += 1;
+        direction = DIR_00P;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
+        nx3 -= 1;
+        direction = DIR_00M;
+    } else
+        UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
+
+    distributions->getDistribution(f, x1, x2, x3);
+    distributions->getDistribution(ftemp, nx1, nx2, nx3);
+
+    LBMReal rho, vx1, vx2, vx3;
+    calcMacrosFct(f, rho, vx1, vx2, vx3);
+    //vx1                  = 0.;
+    LBMReal BCVeloWeight =  0.5;
+    // LBMReal velocity     = 0.004814077025232405; 
+     // LBMReal velocity     = 0.00057735;
+    //LBMReal velocity = 0.04; 
+      // LBMReal velocity = 0.01; 
+     // LBMReal velocity = 1./112.; 
+    // LBMReal velocity = 1./126.; 
+     LBMReal velocity = 1./200.; 
+     // LBMReal velocity = 0.005; 
+    //LBMReal delf         =(-velocity+vx1)*0.5 ;
+    LBMReal delf; 
+
+    switch (direction) {
+        case DIR_P00:
+            delf = (-velocity + vx1) * BCVeloWeight; 
+            // delf = (-velocity ) * BCVeloWeight;
+            f[DIR_P00]   = ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00] - delf* WEIGTH[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            //f[DIR_P00] = (ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00]) *
+            //           (1 - BCVeloWeight) +
+            //       (ftemp[DIR_M00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M00] +
+            //       velocity*(6)*WEIGTH[DIR_P00]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M00])*/) *
+            //           (BCVeloWeight)  ;
+            //f[DIR_PP0] = (ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MM0] +
+            //         velocity * (6) * WEIGTH[DIR_PP0] /*bcPtr->getBoundaryVelocity(INVDIR[DIR_MM0])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PM0] = (ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MP0] +
+            //        velocity*(6)*WEIGTH[DIR_PP0]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_MP0])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_P0P] = (ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]) *
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_M0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M0M] +
+            //        velocity*(6)*WEIGTH[DIR_P0P]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M0M])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_P0M] = (ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M])*
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_M0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_M0P] +
+            //        velocity*(6)*WEIGTH[DIR_P0M]/* bcPtr->getBoundaryVelocity(INVDIR[DIR_M0P])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PPP] = (ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP])*
+            //            (1 - BCVeloWeight) +
+            //        (ftemp[DIR_MMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MMM] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MMM])*/) *
+            //            (BCVeloWeight); 
+            //f[DIR_PMP] = (ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MPM] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /*bcPtr->getBoundaryVelocity(INVDIR[DIR_MPM])*/) *
+            //             (BCVeloWeight); 
+            //f[DIR_PPM] = (ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MMP] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MMP])*/) *
+            //             (BCVeloWeight); 
+            //f[DIR_PMM] = (ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]) *
+            //             (1 - BCVeloWeight) +
+            //         (ftemp[DIR_MPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_MPP] +
+            //     velocity * (6) * WEIGTH[DIR_PPP] /* bcPtr->getBoundaryVelocity(INVDIR[DIR_MPP])*/) *
+            //             (BCVeloWeight); 
+
+            distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            break;
+        case DIR_M00:
+            delf = (-velocity - vx1) * BCVeloWeight;
+            f[DIR_M00] = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00] -
+                   delf * WEIGTH[DIR_M00];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0] -
+                    delf * WEIGTH[DIR_MP0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0] -
+                    delf * WEIGTH[DIR_MM0];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P] -
+                    delf * WEIGTH[DIR_M0P];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M] -
+                    delf * WEIGTH[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_0P0:
+            delf = (-velocity + vx2) * BCVeloWeight;
+            f[DIR_0P0] = ftemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0P0] -
+                   delf * WEIGTH[DIR_0P0];
+            f[DIR_PP0] = ftemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PP0] -
+                    delf * WEIGTH[DIR_PP0];
+            f[DIR_MP0] = ftemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MP0] -
+                    delf * WEIGTH[DIR_MP0];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PP] -
+                    delf * WEIGTH[DIR_0PP];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PM] -
+                    delf * WEIGTH[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPP] -
+                     delf * WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPM] -
+                     delf * WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            break;
+        case DIR_0M0:
+            delf = (-velocity - vx2) * BCVeloWeight;
+            f[DIR_0M0] = ftemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0M0] -
+                   delf * WEIGTH[DIR_0M0];
+            f[DIR_PM0] = ftemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PM0] -
+                    delf * WEIGTH[DIR_PM0];
+            f[DIR_MM0] = ftemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MM0] -
+                    delf * WEIGTH[DIR_MM0];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MP] -
+                    delf * WEIGTH[DIR_0MP];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MM] -
+                    delf * WEIGTH[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMP] -
+                     delf * WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMM] -
+                     delf * WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_00P:
+            delf = (-velocity + vx3) * BCVeloWeight;
+            f[DIR_00P] = ftemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_00P] -
+                   delf * WEIGTH[DIR_00P];
+            f[DIR_P0P] = ftemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_P0P] -
+                    delf * WEIGTH[DIR_P0P];
+            f[DIR_M0P] = ftemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_M0P] -
+                    delf * WEIGTH[DIR_M0P];
+            f[DIR_0PP] = ftemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0PP] -
+                    delf * WEIGTH[DIR_0PP];
+            f[DIR_0MP] = ftemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0MP] -
+                    delf * WEIGTH[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PPP] -
+                     delf * WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MPP] -
+                     delf * WEIGTH[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PMP] -
+                     delf * WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MMP] -
+                     delf * WEIGTH[DIR_MMP];
+
+            distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            break;
+        case DIR_00M:
+            delf = (-velocity - vx3) * BCVeloWeight;
+            f[DIR_00M] = ftemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_00M] -
+                   delf * WEIGTH[DIR_00M];
+            f[DIR_P0M] = ftemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_P0M] -
+                    delf * WEIGTH[DIR_P0M];
+            f[DIR_M0M] = ftemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_M0M] -
+                    delf * WEIGTH[DIR_M0M];
+            f[DIR_0PM] = ftemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0PM] -
+                    delf * WEIGTH[DIR_0PM];
+            f[DIR_0MM] = ftemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0MM] -
+                    delf * WEIGTH[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PPM] -
+                     delf * WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MPM] -
+                     delf * WEIGTH[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PMM] -
+                     delf * WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MMM] -
+                     delf * WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        default:
+            UB_THROW(
+                UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
+    }
+}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h
new file mode 100644
index 0000000000000000000000000000000000000000..1f3e87ce3fff371fbec30dbbe90721bd5ff975cc
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingInflowBCAlgorithm.h
@@ -0,0 +1,50 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingInflowBCAlgorithm.h
+//! \ingroup BoundarConditions
+//! \author Hussein Alihussein
+//=======================================================================================
+#ifndef NonReflectingInflowBCAlgorithm_h__
+#define NonReflectingInflowBCAlgorithm_h__
+
+#include "BCAlgorithm.h"
+#include <PointerDefinitions.h>
+
+class DistributionArray3D;
+
+class NonReflectingInflowBCAlgorithm : public BCAlgorithm
+{
+public:
+    NonReflectingInflowBCAlgorithm();
+    ~NonReflectingInflowBCAlgorithm() override;
+    SPtr<BCAlgorithm> clone() override;
+    void addDistributions(SPtr<DistributionArray3D> distributions) override;
+    void applyBC() override;
+};
+#endif // NonReflectingDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce2c5a6268fd8d1a69d4c56a6ddbebe2df587b6c
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
@@ -0,0 +1,233 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingOutflowBCAlgorithmWithRelaxation.cpp
+//! \ingroup BoundarConditions
+//! \author Konstantin Kutscher, Hussein Alihussein
+//=======================================================================================
+#include "NonReflectingOutflowBCAlgorithmWithRelaxation.h"
+
+#include "BoundaryConditions.h"
+#include "D3Q27System.h"
+#include "DistributionArray3D.h"
+
+NonReflectingOutflowBCAlgorithmWithRelaxation::NonReflectingOutflowBCAlgorithmWithRelaxation()
+{
+    BCAlgorithm::type         = BCAlgorithm::NonReflectingOutflowBCAlgorithmWithRelaxation;
+    BCAlgorithm::preCollision = true;
+}
+//////////////////////////////////////////////////////////////////////////
+NonReflectingOutflowBCAlgorithmWithRelaxation::~NonReflectingOutflowBCAlgorithmWithRelaxation() = default;
+//////////////////////////////////////////////////////////////////////////
+SPtr<BCAlgorithm> NonReflectingOutflowBCAlgorithmWithRelaxation::clone()
+{
+    SPtr<BCAlgorithm> bc(new NonReflectingOutflowBCAlgorithmWithRelaxation());
+    return bc;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingOutflowBCAlgorithmWithRelaxation::addDistributions(SPtr<DistributionArray3D> distributions)
+{
+    this->distributions = distributions;
+}
+//////////////////////////////////////////////////////////////////////////
+void NonReflectingOutflowBCAlgorithmWithRelaxation::applyBC()
+{
+    using namespace vf::lbm::dir;
+
+    using namespace D3Q27System;
+ //   using namespace UbMath;
+    using namespace vf::lbm::constant;
+
+    LBMReal f[ENDF + 1];
+    LBMReal ftemp[ENDF + 1];
+
+    int nx1       = x1;
+    int nx2       = x2;
+    int nx3       = x3;
+    int direction = -1;
+
+    // flag points in direction of fluid
+    if (bcPtr->hasDensityBoundaryFlag(DIR_P00)) {
+        nx1 += 1;
+        direction = DIR_P00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_M00)) {
+        nx1 -= 1;
+        direction = DIR_M00;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0P0)) {
+        nx2 += 1;
+        direction = DIR_0P0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_0M0)) {
+        nx2 -= 1;
+        direction = DIR_0M0;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00P)) {
+        nx3 += 1;
+        direction = DIR_00P;
+    } else if (bcPtr->hasDensityBoundaryFlag(DIR_00M)) {
+        nx3 -= 1;
+        direction = DIR_00M;
+    } else
+        UB_THROW(UbException(UB_EXARGS, "Danger...no orthogonal BC-Flag on density boundary..."));
+
+    distributions->getDistribution(f, x1, x2, x3);
+    distributions->getDistribution(ftemp, nx1, nx2, nx3);
+
+    LBMReal rho, vx1, vx2, vx3;
+    calcMacrosFct(f, rho, vx1, vx2, vx3);
+    LBMReal delf = rho*0.01;
+    switch (direction) {
+        case DIR_P00:
+            f[DIR_P00]   = ftemp[DIR_P00] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P00] - delf* WEIGTH[DIR_P00];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 + vx1) + (1.0 - one_over_sqrt3 - vx1) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_P00], x1 + DX1[DIR_M00], x2 + DX2[DIR_M00], x3 + DX3[DIR_M00], DIR_M00);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            break;
+        case DIR_M00:
+            f[DIR_M00]   = ftemp[DIR_M00] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M00]- delf* WEIGTH[DIR_M00];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MP0]- delf* WEIGTH[DIR_MP0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MM0]- delf* WEIGTH[DIR_MM0];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0P]- delf* WEIGTH[DIR_M0P];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_M0M]- delf* WEIGTH[DIR_M0M];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx1) + (1.0 - one_over_sqrt3 + vx1) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_M00], x1 + DX1[DIR_P00], x2 + DX2[DIR_P00], x3 + DX3[DIR_P00], DIR_P00);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_0P0:
+            f[DIR_0P0]   = ftemp[DIR_0P0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0P0]- delf* WEIGTH[DIR_0P0];
+            f[DIR_PP0]  = ftemp[DIR_PP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PP0]- delf* WEIGTH[DIR_PP0];
+            f[DIR_MP0]  = ftemp[DIR_MP0] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MP0]- delf* WEIGTH[DIR_MP0];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PP]- delf* WEIGTH[DIR_0PP];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_0PM]- delf* WEIGTH[DIR_0PM];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 + vx2) + (1.0 - one_over_sqrt3 - vx2) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0P0], x1 + DX1[DIR_0M0], x2 + DX2[DIR_0M0], x3 + DX3[DIR_0M0], DIR_0M0);
+            distributions->setDistributionInvForDirection(f[DIR_PP0], x1 + DX1[DIR_MM0], x2 + DX2[DIR_MM0], x3 + DX3[DIR_MM0], DIR_MM0);
+            distributions->setDistributionInvForDirection(f[DIR_MP0], x1 + DX1[DIR_PM0], x2 + DX2[DIR_PM0], x3 + DX3[DIR_PM0], DIR_PM0);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            break;
+        case DIR_0M0:
+            f[DIR_0M0]   = ftemp[DIR_0M0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0M0]- delf* WEIGTH[DIR_0M0];
+            f[DIR_PM0]  = ftemp[DIR_PM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PM0]- delf* WEIGTH[DIR_PM0];
+            f[DIR_MM0]  = ftemp[DIR_MM0] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MM0]- delf* WEIGTH[DIR_MM0];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MP]- delf* WEIGTH[DIR_0MP];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_0MM]- delf* WEIGTH[DIR_0MM];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx2) + (1.0 - one_over_sqrt3 + vx2) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_0M0], x1 + DX1[DIR_0P0], x2 + DX2[DIR_0P0], x3 + DX3[DIR_0P0], DIR_0P0);
+            distributions->setDistributionInvForDirection(f[DIR_PM0], x1 + DX1[DIR_MP0], x2 + DX2[DIR_MP0], x3 + DX3[DIR_MP0], DIR_MP0);
+            distributions->setDistributionInvForDirection(f[DIR_MM0], x1 + DX1[DIR_PP0], x2 + DX2[DIR_PP0], x3 + DX3[DIR_PP0], DIR_PP0);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        case DIR_00P:
+            f[DIR_00P]   = ftemp[DIR_00P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_00P]- delf* WEIGTH[DIR_00P];
+            f[DIR_P0P]  = ftemp[DIR_P0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_P0P]- delf* WEIGTH[DIR_P0P];
+            f[DIR_M0P]  = ftemp[DIR_M0P] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_M0P]- delf* WEIGTH[DIR_M0P];
+            f[DIR_0PP]  = ftemp[DIR_0PP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0PP]- delf* WEIGTH[DIR_0PP];
+            f[DIR_0MP]  = ftemp[DIR_0MP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_0MP]- delf* WEIGTH[DIR_0MP];
+            f[DIR_PPP] = ftemp[DIR_PPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PPP]- delf* WEIGTH[DIR_PPP];
+            f[DIR_MPP] = ftemp[DIR_MPP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MPP]- delf* WEIGTH[DIR_MPP];
+            f[DIR_PMP] = ftemp[DIR_PMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_PMP]- delf* WEIGTH[DIR_PMP];
+            f[DIR_MMP] = ftemp[DIR_MMP] * (one_over_sqrt3 + vx3) + (1.0 - one_over_sqrt3 - vx3) * f[DIR_MMP]- delf* WEIGTH[DIR_MMP];
+
+            distributions->setDistributionInvForDirection(f[DIR_00P], x1 + DX1[DIR_00M], x2 + DX2[DIR_00M], x3 + DX3[DIR_00M], DIR_00M);
+            distributions->setDistributionInvForDirection(f[DIR_P0P], x1 + DX1[DIR_M0M], x2 + DX2[DIR_M0M], x3 + DX3[DIR_M0M], DIR_M0M);
+            distributions->setDistributionInvForDirection(f[DIR_M0P], x1 + DX1[DIR_P0M], x2 + DX2[DIR_P0M], x3 + DX3[DIR_P0M], DIR_P0M);
+            distributions->setDistributionInvForDirection(f[DIR_0PP], x1 + DX1[DIR_0MM], x2 + DX2[DIR_0MM], x3 + DX3[DIR_0MM], DIR_0MM);
+            distributions->setDistributionInvForDirection(f[DIR_0MP], x1 + DX1[DIR_0PM], x2 + DX2[DIR_0PM], x3 + DX3[DIR_0PM], DIR_0PM);
+            distributions->setDistributionInvForDirection(f[DIR_PPP], x1 + DX1[DIR_MMM], x2 + DX2[DIR_MMM], x3 + DX3[DIR_MMM], DIR_MMM);
+            distributions->setDistributionInvForDirection(f[DIR_MPP], x1 + DX1[DIR_PMM], x2 + DX2[DIR_PMM], x3 + DX3[DIR_PMM], DIR_PMM);
+            distributions->setDistributionInvForDirection(f[DIR_PMP], x1 + DX1[DIR_MPM], x2 + DX2[DIR_MPM], x3 + DX3[DIR_MPM], DIR_MPM);
+            distributions->setDistributionInvForDirection(f[DIR_MMP], x1 + DX1[DIR_PPM], x2 + DX2[DIR_PPM], x3 + DX3[DIR_PPM], DIR_PPM);
+            break;
+        case DIR_00M:
+            f[DIR_00M]   = ftemp[DIR_00M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_00M]- delf* WEIGTH[DIR_00M];
+            f[DIR_P0M]  = ftemp[DIR_P0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_P0M]- delf* WEIGTH[DIR_P0M];
+            f[DIR_M0M]  = ftemp[DIR_M0M] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_M0M]- delf* WEIGTH[DIR_M0M];
+            f[DIR_0PM]  = ftemp[DIR_0PM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0PM]- delf* WEIGTH[DIR_0PM];
+            f[DIR_0MM]  = ftemp[DIR_0MM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_0MM]- delf* WEIGTH[DIR_0MM];
+            f[DIR_PPM] = ftemp[DIR_PPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PPM]- delf* WEIGTH[DIR_PPM];
+            f[DIR_MPM] = ftemp[DIR_MPM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MPM]- delf* WEIGTH[DIR_MPM];
+            f[DIR_PMM] = ftemp[DIR_PMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_PMM]- delf* WEIGTH[DIR_PMM];
+            f[DIR_MMM] = ftemp[DIR_MMM] * (one_over_sqrt3 - vx3) + (1.0 - one_over_sqrt3 + vx3) * f[DIR_MMM]- delf* WEIGTH[DIR_MMM];
+
+            distributions->setDistributionInvForDirection(f[DIR_00M], x1 + DX1[DIR_00P], x2 + DX2[DIR_00P], x3 + DX3[DIR_00P], DIR_00P);
+            distributions->setDistributionInvForDirection(f[DIR_P0M], x1 + DX1[DIR_M0P], x2 + DX2[DIR_M0P], x3 + DX3[DIR_M0P], DIR_M0P);
+            distributions->setDistributionInvForDirection(f[DIR_M0M], x1 + DX1[DIR_P0P], x2 + DX2[DIR_P0P], x3 + DX3[DIR_P0P], DIR_P0P);
+            distributions->setDistributionInvForDirection(f[DIR_0PM], x1 + DX1[DIR_0MP], x2 + DX2[DIR_0MP], x3 + DX3[DIR_0MP], DIR_0MP);
+            distributions->setDistributionInvForDirection(f[DIR_0MM], x1 + DX1[DIR_0PP], x2 + DX2[DIR_0PP], x3 + DX3[DIR_0PP], DIR_0PP);
+            distributions->setDistributionInvForDirection(f[DIR_PPM], x1 + DX1[DIR_MMP], x2 + DX2[DIR_MMP], x3 + DX3[DIR_MMP], DIR_MMP);
+            distributions->setDistributionInvForDirection(f[DIR_MPM], x1 + DX1[DIR_PMP], x2 + DX2[DIR_PMP], x3 + DX3[DIR_PMP], DIR_PMP);
+            distributions->setDistributionInvForDirection(f[DIR_PMM], x1 + DX1[DIR_MPP], x2 + DX2[DIR_MPP], x3 + DX3[DIR_MPP], DIR_MPP);
+            distributions->setDistributionInvForDirection(f[DIR_MMM], x1 + DX1[DIR_PPP], x2 + DX2[DIR_PPP], x3 + DX3[DIR_PPP], DIR_PPP);
+            break;
+        default:
+            UB_THROW(
+                UbException(UB_EXARGS, "It isn't implemented non reflecting density boundary for this direction!"));
+    }
+}
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h
new file mode 100644
index 0000000000000000000000000000000000000000..97badb60dbe84e0b7a4a3fa82b950649e0a12d93
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/NonReflectingOutflowBCAlgorithmWithRelaxation.h
@@ -0,0 +1,50 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonReflectingOutflowBCAlgorithmWithRelaxation.h
+//! \ingroup BoundarConditions
+//! \author Konstantin Kutscher, Hussein Alihussein
+//=======================================================================================
+#ifndef NonReflectingOutflowBCAlgorithmWithRelaxation_h__
+#define NonReflectingOutflowBCAlgorithmWithRelaxation_h__
+
+#include "BCAlgorithm.h"
+#include <PointerDefinitions.h>
+
+class DistributionArray3D;
+
+class NonReflectingOutflowBCAlgorithmWithRelaxation : public BCAlgorithm
+{
+public:
+    NonReflectingOutflowBCAlgorithmWithRelaxation();
+    ~NonReflectingOutflowBCAlgorithmWithRelaxation() override;
+    SPtr<BCAlgorithm> clone() override;
+    void addDistributions(SPtr<DistributionArray3D> distributions) override;
+    void applyBC() override;
+};
+#endif // NonReflectingDensityBCAlgorithm_h__
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
index 4d5c2dd5e374e9f583b29e50f5d461b712bf5900..5bc11cbb4cadc7b6e28fd163b8479413673c3b1c 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyVelocityBCAlgorithm.cpp
@@ -51,29 +51,33 @@ void RheologyVelocityBCAlgorithm::addDistributions(SPtr<DistributionArray3D> dis
 //////////////////////////////////////////////////////////////////////////
 void RheologyVelocityBCAlgorithm::applyBC()
 {
-   real f[D3Q27System::ENDF+1];
-   real feq[D3Q27System::ENDF+1];
-   distributions->getDistributionInv(f, x1, x2, x3);
-   real rho, vx1, vx2, vx3, drho;
-   calcMacrosFct(f, drho, vx1, vx2, vx3);
-   calcFeqFct(feq, drho, vx1, vx2, vx3);
+    using namespace vf::lbm::constant;
+
+    real f[D3Q27System::ENDF+1];
+    real feq[D3Q27System::ENDF+1];
+    distributions->getDistribution(f, x1, x2, x3);
+    real rho, vx1, vx2, vx3, drho;
+    calcMacrosFct(f, drho, vx1, vx2, vx3);
+    calcFeqFct(feq, drho, vx1, vx2, vx3);
 
     real shearRate = D3Q27System::getShearRate(f, collFactor);
+    // getShearRate(f, collFactor) takes pre collision state but is given post collision state. Requires rescaling
     real collFactorF = getRheologyCollFactor(collFactor, shearRate, drho);
 
-    rho = vf::lbm::constant::c1o1+drho*compressibleFactor;
 
-   for (int fdir = D3Q27System::FSTARTDIR; fdir<=D3Q27System::FENDDIR; fdir++)
-   {
-      if (bcPtr->hasVelocityBoundaryFlag(fdir))
-      {
-         const int invDir = D3Q27System::INVDIR[fdir];
-         real q = bcPtr->getQ(invDir);// m+m q=0 stabiler
-         real velocity = bcPtr->getBoundaryVelocity(invDir);
-         real fReturn = ((vf::lbm::constant::c1o1-q)/(vf::lbm::constant::c1o1+q))*((f[invDir]-feq[invDir])/(vf::lbm::constant::c1o1-collFactorF)+feq[invDir])+((q*(f[invDir]+f[fdir])-velocity*rho)/(vf::lbm::constant::c1o1+q));
-         distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
-      }
-   }
+    rho = 1.0+drho*compressibleFactor;
+
+    for (int fdir = D3Q27System::FSTARTDIR; fdir<=D3Q27System::FENDDIR; fdir++)
+    {
+        if (bcPtr->hasVelocityBoundaryFlag(fdir))
+        {
+            const int invDir = D3Q27System::INVDIR[fdir];
+            real q = bcPtr->getQ(invDir);
+            real velocity = bcPtr->getBoundaryVelocity(invDir);
+            real fReturn = ((c1o1 - q) / (c1o1 + q)) * ((f[invDir])) + ((q * (f[invDir] + f[fdir] - collFactorF * (f[invDir] - feq[invDir] + f[fdir] - feq[fdir])) - velocity * rho) / (c1o1 + q));
+            distributions->setDistributionForDirection(fReturn, x1+D3Q27System::DX1[invDir], x2+D3Q27System::DX2[invDir], x3+D3Q27System::DX3[invDir], fdir);
+        }
+    }
 
 }
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
index 93b3854070c0b9f1f589e6d32f1872cc8521ca86..57048674137ccbf1445d548f332b2f5403d9ca4c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
@@ -12,6 +12,7 @@
 #include "BCArray3D.h"
 #include "EsoTwist3D.h"
 #include "DistributionArray3D.h"
+#include "Rheology.h"
 
 CalculateTorqueCoProcessor::CalculateTorqueCoProcessor( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::mpi::Communicator> comm) : CoProcessor(grid, s), path(path_), comm(comm), torqueX1global(0), torqueX2global(0), torqueX3global(0)
 {
@@ -70,7 +71,10 @@ void CalculateTorqueCoProcessor::collectData( real step )
       ostr << istep << ";";
       ostr << torqueX1global << ";";
       ostr << torqueX2global << ";";
-      ostr << torqueX3global;
+      ostr << torqueX3global << ";";
+      ostr << Fx << ";";
+      ostr << Fy << ";";
+      ostr << Fz;
       ostr << std::endl;
       ostr.close();
    }
@@ -103,7 +107,6 @@ void CalculateTorqueCoProcessor::calculateForces()
 
          SPtr<BCArray3D> bcArray = kernel->getBCProcessor()->getBCArray();          
          SPtr<DistributionArray3D> distributions = kernel->getDataSet()->getFdistributions(); 
-         distributions->swap();
 
          int ghostLayerWidth = kernel->getGhostLayerWidth();
          int minX1 = ghostLayerWidth;
@@ -125,15 +128,24 @@ void CalculateTorqueCoProcessor::calculateForces()
             if(bcArray->isFluid(x1,x2,x3)) //es kann sein, dass der node von einem anderen interactor z.B. als solid gemarkt wurde!!!
             {
                SPtr<BoundaryConditions> bc = bcArray->getBC(x1,x2,x3);
-               UbTupleDouble3 forceVec     = getForces(x1,x2,x3,distributions,bc);
-               real Fx                   = val<1>(forceVec);
-               real Fy                   = val<2>(forceVec);
-               real Fz                   = val<3>(forceVec);
-
+               
                Vector3D worldCoordinates = grid->getNodeCoordinates(block, x1, x2, x3);
-               real rx                 = (worldCoordinates[0] - x1Centre) / deltaX;
-               real ry                 = (worldCoordinates[1] - x2Centre) / deltaX;
-               real rz                 = (worldCoordinates[2] - x3Centre) / deltaX;
+               real rx = (worldCoordinates[0] - x1Centre) / deltaX;
+               real ry = (worldCoordinates[1] - x2Centre) / deltaX;
+               real rz = (worldCoordinates[2] - x3Centre) / deltaX;
+
+               // real nx = rx / sqrt(rx * rx + ry * ry + rz * rz);
+               // real ny = ry / sqrt(rx * rx + ry * ry + rz * rz);
+               // real nz = rz / sqrt(rx * rx + ry * ry + rz * rz);
+
+               UbTupleDouble3 forceVec = getForces(x1, x2, x3, distributions, bc);
+               //UbTupleDouble3 forceVec = getForcesFromMoments(x1, x2, x3, kernel, distributions, bc, nx, ny, nz);
+               //UbTupleDouble3 forceVec = getForcesFromStressTensor(x1, x2, x3, kernel, distributions, bc, nx, ny, nz);
+               /*real*/ Fx                   = val<1>(forceVec);
+               /*real*/ Fy                   = val<2>(forceVec);
+               /*real*/ Fz                   = val<3>(forceVec);
+              
+
 
                torqueX1 += ry * Fz - rz * Fy;
                torqueX2 += rz * Fx - rx * Fz;
@@ -141,8 +153,6 @@ void CalculateTorqueCoProcessor::calculateForces()
             }
          }
 
-         distributions->swap();
-
          torqueX1global += torqueX1;
          torqueX2global += torqueX2;
          torqueX3global += torqueX3;
@@ -174,9 +184,6 @@ UbTupleDouble3 CalculateTorqueCoProcessor::getForces(int x1, int x2, int x3,  SP
 {
    UbTupleDouble3 force(0.0,0.0,0.0);
 
-   real fs[D3Q27System::ENDF + 1];
-   distributions->getDistributionInv(fs, x1, x2, x3);
-   
    if(bc)
    {
       //references to tuple "force"
@@ -185,6 +192,8 @@ UbTupleDouble3 CalculateTorqueCoProcessor::getForces(int x1, int x2, int x3,  SP
       real& forceX3 = val<3>(force);
       real f,  fnbr;
 
+      dynamicPointerCast<EsoTwist3D>(distributions)->swap();
+
       for(int fdir=D3Q27System::FSTARTDIR; fdir<=D3Q27System::FENDDIR; fdir++)
       {
          if(bc->hasNoSlipBoundaryFlag(fdir) || bc->hasVelocityBoundaryFlag(fdir))
@@ -198,8 +207,81 @@ UbTupleDouble3 CalculateTorqueCoProcessor::getForces(int x1, int x2, int x3,  SP
             forceX3 += (f + fnbr) * D3Q27System::DX3[invDir];
          }
       }
+
+      dynamicPointerCast<EsoTwist3D>(distributions)->swap();
+   }
+
+   return force;
+}
+//////////////////////////////////////////////////////////////////////////
+UbTupleDouble3 CalculateTorqueCoProcessor::getForcesFromMoments(int x1, int x2, int x3, SPtr<ILBMKernel> kernel, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc, real nx, real ny, real nz)
+{
+   using namespace vf::lbm::constant;
+   UbTupleDouble3 force(0.0, 0.0, 0.0);
+
+
+   if (bc) {
+      real f[D3Q27System::ENDF + 1];
+      distributions->getDistribution(f, x1, x2, x3);
+      real collFactor = kernel->getCollisionFactor();
+      real shearRate = D3Q27System::getShearRate(f, collFactor);
+      real rho = D3Q27System::getDensity(f);
+      real omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
+      std::array<real, 6> moments = D3Q27System::getSecondMoments(f, omega);
+
+      // references to tuple "force"
+      real &forceX1 = val<1>(force);
+      real &forceX2 = val<2>(force);
+      real &forceX3 = val<3>(force);
+
+      real mxx = (moments[0] + moments[1] + moments[2])*c1o3;
+      real myy = (-c2o1 * moments[1] + moments[2] + moments[0]) * c1o3; 
+      real mzz = (-c2o1 * moments[2] + moments[1] + moments[0]) * c1o3;
+      real mxy = moments[3];
+      real mxz = moments[4];
+      real myz = moments[5];
+      
+      forceX1 = mxx *nx + mxy*ny + mxz*nz;
+      forceX2 = mxy *nx + myy*ny + myz*nz;
+      forceX3 = mxz *nx + myz*ny + mzz*nz;
    }
-   
+
+   return force;
+}
+//////////////////////////////////////////////////////////////////////////
+UbTupleDouble3 CalculateTorqueCoProcessor::getForcesFromStressTensor(int x1, int x2, int x3, SPtr<ILBMKernel> kernel, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc, real nx, real ny, real nz)
+{
+   using namespace vf::lbm::constant;
+   UbTupleDouble3 force(0.0, 0.0, 0.0);
+
+   if (bc) {
+      real f[D3Q27System::ENDF + 1];
+      distributions->getDistribution(f, x1, x2, x3);
+      real collFactor = kernel->getCollisionFactor();
+      real shearRate = D3Q27System::getShearRate(f, collFactor);
+      real rho = D3Q27System::getDensity(f);
+      real omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
+      std::array<real, 6> stress = D3Q27System::getStressTensor(f, omega);
+
+      // references to tuple "force"
+      real &forceX1 = val<1>(force);
+      real &forceX2 = val<2>(force);
+      real &forceX3 = val<3>(force);
+
+      real &tauXX = stress[0];
+      real &tauYY = stress[1];
+      real &tauZZ = stress[2];
+      real &tauXY = stress[3];
+      real &tauXZ = stress[4];
+      real &tauYZ = stress[5];
+
+      //https: // journals.aps.org/pre/pdf/10.1103/PhysRevE.88.013303
+
+      forceX1 = tauXX * nx + tauXY * ny + tauXZ * nz;
+      forceX2 = tauXY * nx + tauYY * ny + tauYZ * nz;
+      forceX3 = tauXZ * nx + tauYZ * ny + tauZZ * nz;
+   }
+
    return force;
 }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
index 26686239bdca68a047c4f8c8f4c33f5a09f53bb5..98b6d4001c6ed7f50c5c05254175c73117590072 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
@@ -23,6 +23,7 @@ class UbScheduler;
 class D3Q27Interactor;
 class DistributionArray3D;
 class BoundaryConditions;
+class ILBMKernel;
 
 class CalculateTorqueCoProcessor: public CoProcessor 
 {
@@ -35,7 +36,10 @@ public:
 protected:
 	void collectData(real step);
    void calculateForces();
-   UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
+    UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
+    UbTupleDouble3 getForcesFromMoments(int x1, int x2, int x3, SPtr<ILBMKernel> kernel, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc, real nx, real ny, real nz);
+    UbTupleDouble3 getForcesFromStressTensor(int x1, int x2, int x3, SPtr<ILBMKernel> kernel, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc, real nx, real ny, real nz);
+
 private:
    std::string path;
    std::shared_ptr<vf::mpi::Communicator> comm;
@@ -43,6 +47,8 @@ private:
    real torqueX1global;
    real torqueX2global;
    real torqueX3global;
+
+   real Fx, Fy, Fz;
 };
 
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
index 4e764c36350bbe234f4f50851a85fc35e5336049..9a4af594717da6739af8b8c16abccd6cf63c3b76 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
@@ -153,8 +153,16 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 	datanames.resize(0);
 	datanames.push_back("viscosity");
 	//datanames.push_back("lambda");
-	//datanames.push_back("ShearRate");
+	datanames.push_back("ShearRate");
 	datanames.push_back("omega");
+
+	datanames.push_back("MP");
+    datanames.push_back("MXXMYY");
+    datanames.push_back("MXXMZZ");
+    datanames.push_back("MXY");
+    datanames.push_back("MXZ");
+    datanames.push_back("MYZ");
+
 	//datanames.push_back("Fluxx");
 	//datanames.push_back("Fluxy");
 	//datanames.push_back("Fluxz");
@@ -249,10 +257,20 @@ void WriteThixotropyQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 					//LBMReal omega = Rheology::getPowellEyringCollFactor(collFactor, shearRate, rho);
 					real omega = Rheology::getBinghamCollFactor(collFactor, shearRate, rho);
 					real viscosity = (omega == 0) ? 0 : vf::lbm::constant::c1o3 * (vf::lbm::constant::c1o1/omega- vf::lbm::constant::c1o2);
+                    std::array<real, 6> moments = D3Q27System::getSecondMoments(f, omega);
+
 
 					
 					data[index++].push_back(viscosity);
+                    data[index++].push_back(shearRate);
 					data[index++].push_back(omega);
+
+					data[index++].push_back(moments[0]);
+                    data[index++].push_back(moments[1]);
+                    data[index++].push_back(moments[2]);
+                    data[index++].push_back(moments[3]);
+                    data[index++].push_back(moments[4]);
+                    data[index++].push_back(moments[5]);
 				}
 			}
 		}
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
index 4b7f6dcea27a293cb319a65b3a864965cbed01a7..a8bf297c55d656f5e277fe6e280fb0502a32c283 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
@@ -37,6 +37,7 @@
 #include <cmath>
 #include <string>
 #include <iostream>
+#include <array>
 
 #include "lbm/constants/D3Q27.h"
 #include "LBMSystem.h"
@@ -1393,6 +1394,662 @@ static inline real getShearRate(const real *const f, real collFactorF)
     return sqrt(vf::lbm::constant::c2o1 * (dxux * dxux + dyuy * dyuy + dzuz * dzuz) + Dxy * Dxy + Dxz * Dxz + Dyz * Dyz) /
            (rho + vf::lbm::constant::c1o1);
 }
+
+static inline std::array<real,6> getSecondMoments(const real *const f, real collFactorF)
+{
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
+
+    real mfcbb = f[DIR_P00];
+    real mfbcb = f[DIR_0P0];
+    real mfbbc = f[DIR_00P];
+    real mfccb = f[DIR_PP0];
+    real mfacb = f[DIR_MP0];
+    real mfcbc = f[DIR_P0P];
+    real mfabc = f[DIR_M0P];
+    real mfbcc = f[DIR_0PP];
+    real mfbac = f[DIR_0MP];
+    real mfccc = f[DIR_PPP];
+    real mfacc = f[DIR_MPP];
+    real mfcac = f[DIR_PMP];
+    real mfaac = f[DIR_MMP];
+
+    real mfabb = f[DIR_M00];
+    real mfbab = f[DIR_0M0];
+    real mfbba = f[DIR_00M];
+    real mfaab = f[DIR_MM0];
+    real mfcab = f[DIR_PM0];
+    real mfaba = f[DIR_M0M];
+    real mfcba = f[DIR_P0M];
+    real mfbaa = f[DIR_0MM];
+    real mfbca = f[DIR_0PM];
+    real mfaaa = f[DIR_MMM];
+    real mfcaa = f[DIR_PMM];
+    real mfaca = f[DIR_MPM];
+    real mfcca = f[DIR_PPM];
+
+    real mfbbb = f[DIR_000];
+
+    real m0, m1, m2;
+
+    //real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
+
+    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb));
+    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab));
+    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba));
+
+    real oMdrho;
+
+    oMdrho = mfccc + mfaaa;
+    m0 = mfaca + mfcac;
+    m1 = mfacc + mfcaa;
+    m2 = mfaac + mfcca;
+    oMdrho += m0;
+    m1 += m2;
+    oMdrho += m1;
+    m0 = mfbac + mfbca;
+    m1 = mfbaa + mfbcc;
+    m0 += m1;
+    m1 = mfabc + mfcba;
+    m2 = mfaba + mfcbc;
+    m1 += m2;
+    m0 += m1;
+    m1 = mfacb + mfcab;
+    m2 = mfaab + mfccb;
+    m1 += m2;
+    m0 += m1;
+    oMdrho += m0;
+    m0 = mfabb + mfcbb;
+    m1 = mfbab + mfbcb;
+    m2 = mfbba + mfbbc;
+    m0 += m1 + m2;
+    m0 += mfbbb; // hat gefehlt
+    oMdrho = 1. - (oMdrho + m0);
+
+    real vx2;
+    real vy2;
+    real vz2;
+    vx2 = vvx * vvx;
+    vy2 = vvy * vvy;
+    vz2 = vvz * vvz;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Hin
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    m2 = mfaaa + mfaac;
+    m1 = mfaac - mfaaa;
+    m0 = m2 + mfaab;
+    mfaaa = m0;
+    m0 += c1o36 * oMdrho;
+    mfaab = m1 - m0 * vvz;
+    mfaac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaba + mfabc;
+    m1 = mfabc - mfaba;
+    m0 = m2 + mfabb;
+    mfaba = m0;
+    m0 += c1o9 * oMdrho;
+    mfabb = m1 - m0 * vvz;
+    mfabc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaca + mfacc;
+    m1 = mfacc - mfaca;
+    m0 = m2 + mfacb;
+    mfaca = m0;
+    m0 += c1o36 * oMdrho;
+    mfacb = m1 - m0 * vvz;
+    mfacc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbaa + mfbac;
+    m1 = mfbac - mfbaa;
+    m0 = m2 + mfbab;
+    mfbaa = m0;
+    m0 += c1o9 * oMdrho;
+    mfbab = m1 - m0 * vvz;
+    mfbac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbba + mfbbc;
+    m1 = mfbbc - mfbba;
+    m0 = m2 + mfbbb;
+    mfbba = m0;
+    m0 += c4o9 * oMdrho;
+    mfbbb = m1 - m0 * vvz;
+    mfbbc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbca + mfbcc;
+    m1 = mfbcc - mfbca;
+    m0 = m2 + mfbcb;
+    mfbca = m0;
+    m0 += c1o9 * oMdrho;
+    mfbcb = m1 - m0 * vvz;
+    mfbcc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcaa + mfcac;
+    m1 = mfcac - mfcaa;
+    m0 = m2 + mfcab;
+    mfcaa = m0;
+    m0 += c1o36 * oMdrho;
+    mfcab = m1 - m0 * vvz;
+    mfcac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcba + mfcbc;
+    m1 = mfcbc - mfcba;
+    m0 = m2 + mfcbb;
+    mfcba = m0;
+    m0 += c1o9 * oMdrho;
+    mfcbb = m1 - m0 * vvz;
+    mfcbc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcca + mfccc;
+    m1 = mfccc - mfcca;
+    m0 = m2 + mfccb;
+    mfcca = m0;
+    m0 += c1o36 * oMdrho;
+    mfccb = m1 - m0 * vvz;
+    mfccc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    m2 = mfaaa + mfaca;
+    m1 = mfaca - mfaaa;
+    m0 = m2 + mfaba;
+    mfaaa = m0;
+    m0 += c1o6 * oMdrho;
+    mfaba = m1 - m0 * vvy;
+    mfaca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaab + mfacb;
+    m1 = mfacb - mfaab;
+    m0 = m2 + mfabb;
+    mfaab = m0;
+    mfabb = m1 - m0 * vvy;
+    mfacb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaac + mfacc;
+    m1 = mfacc - mfaac;
+    m0 = m2 + mfabc;
+    mfaac = m0;
+    m0 += c1o18 * oMdrho;
+    mfabc = m1 - m0 * vvy;
+    mfacc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbaa + mfbca;
+    m1 = mfbca - mfbaa;
+    m0 = m2 + mfbba;
+    mfbaa = m0;
+    m0 += c2o3 * oMdrho;
+    mfbba = m1 - m0 * vvy;
+    mfbca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbab + mfbcb;
+    m1 = mfbcb - mfbab;
+    m0 = m2 + mfbbb;
+    mfbab = m0;
+    mfbbb = m1 - m0 * vvy;
+    mfbcb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbac + mfbcc;
+    m1 = mfbcc - mfbac;
+    m0 = m2 + mfbbc;
+    mfbac = m0;
+    m0 += c2o9 * oMdrho;
+    mfbbc = m1 - m0 * vvy;
+    mfbcc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcaa + mfcca;
+    m1 = mfcca - mfcaa;
+    m0 = m2 + mfcba;
+    mfcaa = m0;
+    m0 += c1o6 * oMdrho;
+    mfcba = m1 - m0 * vvy;
+    mfcca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcab + mfccb;
+    m1 = mfccb - mfcab;
+    m0 = m2 + mfcbb;
+    mfcab = m0;
+    mfcbb = m1 - m0 * vvy;
+    mfccb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcac + mfccc;
+    m1 = mfccc - mfcac;
+    m0 = m2 + mfcbc;
+    mfcac = m0;
+    m0 += c1o18 * oMdrho;
+    mfcbc = m1 - m0 * vvy;
+    mfccc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9            Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    m2 = mfaaa + mfcaa;
+    m1 = mfcaa - mfaaa;
+    m0 = m2 + mfbaa;
+    mfaaa = m0;
+    m0 += 1. * oMdrho;
+    mfbaa = m1 - m0 * vvx;
+    mfcaa = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaba + mfcba;
+    m1 = mfcba - mfaba;
+    m0 = m2 + mfbba;
+    mfaba = m0;
+    mfbba = m1 - m0 * vvx;
+    mfcba = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaca + mfcca;
+    m1 = mfcca - mfaca;
+    m0 = m2 + mfbca;
+    mfaca = m0;
+    m0 += c1o3 * oMdrho;
+    mfbca = m1 - m0 * vvx;
+    mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaab + mfcab;
+    m1 = mfcab - mfaab;
+    m0 = m2 + mfbab;
+    mfaab = m0;
+    mfbab = m1 - m0 * vvx;
+    mfcab = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfabb + mfcbb;
+    m1 = mfcbb - mfabb;
+    m0 = m2 + mfbbb;
+    mfabb = m0;
+    mfbbb = m1 - m0 * vvx;
+    mfcbb = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfacb + mfccb;
+    m1 = mfccb - mfacb;
+    m0 = m2 + mfbcb;
+    mfacb = m0;
+    mfbcb = m1 - m0 * vvx;
+    mfccb = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaac + mfcac;
+    m1 = mfcac - mfaac;
+    m0 = m2 + mfbac;
+    mfaac = m0;
+    m0 += c1o3 * oMdrho;
+    mfbac = m1 - m0 * vvx;
+    mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfabc + mfcbc;
+    m1 = mfcbc - mfabc;
+    m0 = m2 + mfbbc;
+    mfabc = m0;
+    mfbbc = m1 - m0 * vvx;
+    mfcbc = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfacc + mfccc;
+    m1 = mfccc - mfacc;
+    m0 = m2 + mfbcc;
+    mfacc = m0;
+    m0 += c1o9 * oMdrho;
+    mfbcc = m1 - m0 * vvx;
+    mfccc = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Cumulants
+    ////////////////////////////////////////////////////////////////////////////////////
+    real OxxPyyPzz = 1.; // omega2 or bulk viscosity
+
+    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+    real mxxMyy = mfcaa - mfaca;
+    real mxxMzz = mfcaa - mfaac;
+
+   // average pre and post collision
+    std::array<real, 6> moments = {
+    (mxxPyyPzz-mfaaa) * (c1o1 - c1o2 * OxxPyyPzz),
+    (mxxMyy) * (c1o1 - c1o2 * collFactorF),
+    (mxxMzz) * (c1o1 - c1o2 * collFactorF),
+    (mfbba)  * (c1o1 - c1o2 * collFactorF),
+    (mfbab)  * (c1o1 - c1o2 * collFactorF),
+    (mfabb)  * (c1o1 - c1o2 * collFactorF)
+    };
+
+    return moments;
+}
+static inline std::array<real, 6> getStressTensor(const real *const f, real collFactorF)
+{
+    using namespace vf::lbm::dir;
+    using namespace vf::lbm::constant;
+
+    real mfcbb = f[DIR_P00];
+    real mfbcb = f[DIR_0P0];
+    real mfbbc = f[DIR_00P];
+    real mfccb = f[DIR_PP0];
+    real mfacb = f[DIR_MP0];
+    real mfcbc = f[DIR_P0P];
+    real mfabc = f[DIR_M0P];
+    real mfbcc = f[DIR_0PP];
+    real mfbac = f[DIR_0MP];
+    real mfccc = f[DIR_PPP];
+    real mfacc = f[DIR_MPP];
+    real mfcac = f[DIR_PMP];
+    real mfaac = f[DIR_MMP];
+
+    real mfabb = f[DIR_M00];
+    real mfbab = f[DIR_0M0];
+    real mfbba = f[DIR_00M];
+    real mfaab = f[DIR_MM0];
+    real mfcab = f[DIR_PM0];
+    real mfaba = f[DIR_M0M];
+    real mfcba = f[DIR_P0M];
+    real mfbaa = f[DIR_0MM];
+    real mfbca = f[DIR_0PM];
+    real mfaaa = f[DIR_MMM];
+    real mfcaa = f[DIR_PMM];
+    real mfaca = f[DIR_MPM];
+    real mfcca = f[DIR_PPM];
+
+    real mfbbb = f[DIR_000];
+
+    real m0, m1, m2;
+
+    //real rho = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca) + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc) + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
+
+    real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) + (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb));
+    real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) + (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab));
+    real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) + (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba));
+
+    real oMdrho;
+
+    oMdrho = mfccc + mfaaa;
+    m0 = mfaca + mfcac;
+    m1 = mfacc + mfcaa;
+    m2 = mfaac + mfcca;
+    oMdrho += m0;
+    m1 += m2;
+    oMdrho += m1;
+    m0 = mfbac + mfbca;
+    m1 = mfbaa + mfbcc;
+    m0 += m1;
+    m1 = mfabc + mfcba;
+    m2 = mfaba + mfcbc;
+    m1 += m2;
+    m0 += m1;
+    m1 = mfacb + mfcab;
+    m2 = mfaab + mfccb;
+    m1 += m2;
+    m0 += m1;
+    oMdrho += m0;
+    m0 = mfabb + mfcbb;
+    m1 = mfbab + mfbcb;
+    m2 = mfbba + mfbbc;
+    m0 += m1 + m2;
+    m0 += mfbbb; // hat gefehlt
+    oMdrho = 1. - (oMdrho + m0);
+
+    real vx2;
+    real vy2;
+    real vz2;
+    vx2 = vvx * vvx;
+    vy2 = vvy * vvy;
+    vz2 = vvz * vvz;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Hin
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    m2 = mfaaa + mfaac;
+    m1 = mfaac - mfaaa;
+    m0 = m2 + mfaab;
+    mfaaa = m0;
+    m0 += c1o36 * oMdrho;
+    mfaab = m1 - m0 * vvz;
+    mfaac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaba + mfabc;
+    m1 = mfabc - mfaba;
+    m0 = m2 + mfabb;
+    mfaba = m0;
+    m0 += c1o9 * oMdrho;
+    mfabb = m1 - m0 * vvz;
+    mfabc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaca + mfacc;
+    m1 = mfacc - mfaca;
+    m0 = m2 + mfacb;
+    mfaca = m0;
+    m0 += c1o36 * oMdrho;
+    mfacb = m1 - m0 * vvz;
+    mfacc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbaa + mfbac;
+    m1 = mfbac - mfbaa;
+    m0 = m2 + mfbab;
+    mfbaa = m0;
+    m0 += c1o9 * oMdrho;
+    mfbab = m1 - m0 * vvz;
+    mfbac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbba + mfbbc;
+    m1 = mfbbc - mfbba;
+    m0 = m2 + mfbbb;
+    mfbba = m0;
+    m0 += c4o9 * oMdrho;
+    mfbbb = m1 - m0 * vvz;
+    mfbbc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbca + mfbcc;
+    m1 = mfbcc - mfbca;
+    m0 = m2 + mfbcb;
+    mfbca = m0;
+    m0 += c1o9 * oMdrho;
+    mfbcb = m1 - m0 * vvz;
+    mfbcc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcaa + mfcac;
+    m1 = mfcac - mfcaa;
+    m0 = m2 + mfcab;
+    mfcaa = m0;
+    m0 += c1o36 * oMdrho;
+    mfcab = m1 - m0 * vvz;
+    mfcac = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcba + mfcbc;
+    m1 = mfcbc - mfcba;
+    m0 = m2 + mfcbb;
+    mfcba = m0;
+    m0 += c1o9 * oMdrho;
+    mfcbb = m1 - m0 * vvz;
+    mfcbc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcca + mfccc;
+    m1 = mfccc - mfcca;
+    m0 = m2 + mfccb;
+    mfcca = m0;
+    m0 += c1o36 * oMdrho;
+    mfccb = m1 - m0 * vvz;
+    mfccc = m2 - 2. * m1 * vvz + vz2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    m2 = mfaaa + mfaca;
+    m1 = mfaca - mfaaa;
+    m0 = m2 + mfaba;
+    mfaaa = m0;
+    m0 += c1o6 * oMdrho;
+    mfaba = m1 - m0 * vvy;
+    mfaca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaab + mfacb;
+    m1 = mfacb - mfaab;
+    m0 = m2 + mfabb;
+    mfaab = m0;
+    mfabb = m1 - m0 * vvy;
+    mfacb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaac + mfacc;
+    m1 = mfacc - mfaac;
+    m0 = m2 + mfabc;
+    mfaac = m0;
+    m0 += c1o18 * oMdrho;
+    mfabc = m1 - m0 * vvy;
+    mfacc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbaa + mfbca;
+    m1 = mfbca - mfbaa;
+    m0 = m2 + mfbba;
+    mfbaa = m0;
+    m0 += c2o3 * oMdrho;
+    mfbba = m1 - m0 * vvy;
+    mfbca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbab + mfbcb;
+    m1 = mfbcb - mfbab;
+    m0 = m2 + mfbbb;
+    mfbab = m0;
+    mfbbb = m1 - m0 * vvy;
+    mfbcb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfbac + mfbcc;
+    m1 = mfbcc - mfbac;
+    m0 = m2 + mfbbc;
+    mfbac = m0;
+    m0 += c2o9 * oMdrho;
+    mfbbc = m1 - m0 * vvy;
+    mfbcc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcaa + mfcca;
+    m1 = mfcca - mfcaa;
+    m0 = m2 + mfcba;
+    mfcaa = m0;
+    m0 += c1o6 * oMdrho;
+    mfcba = m1 - m0 * vvy;
+    mfcca = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcab + mfccb;
+    m1 = mfccb - mfcab;
+    m0 = m2 + mfcbb;
+    mfcab = m0;
+    mfcbb = m1 - m0 * vvy;
+    mfccb = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfcac + mfccc;
+    m1 = mfccc - mfcac;
+    m0 = m2 + mfcbc;
+    mfcac = m0;
+    m0 += c1o18 * oMdrho;
+    mfcbc = m1 - m0 * vvy;
+    mfccc = m2 - 2. * m1 * vvy + vy2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9            Konditionieren
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    m2 = mfaaa + mfcaa;
+    m1 = mfcaa - mfaaa;
+    m0 = m2 + mfbaa;
+    mfaaa = m0;
+    m0 += 1. * oMdrho;
+    mfbaa = m1 - m0 * vvx;
+    mfcaa = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaba + mfcba;
+    m1 = mfcba - mfaba;
+    m0 = m2 + mfbba;
+    mfaba = m0;
+    mfbba = m1 - m0 * vvx;
+    mfcba = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaca + mfcca;
+    m1 = mfcca - mfaca;
+    m0 = m2 + mfbca;
+    mfaca = m0;
+    m0 += c1o3 * oMdrho;
+    mfbca = m1 - m0 * vvx;
+    mfcca = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaab + mfcab;
+    m1 = mfcab - mfaab;
+    m0 = m2 + mfbab;
+    mfaab = m0;
+    mfbab = m1 - m0 * vvx;
+    mfcab = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfabb + mfcbb;
+    m1 = mfcbb - mfabb;
+    m0 = m2 + mfbbb;
+    mfabb = m0;
+    mfbbb = m1 - m0 * vvx;
+    mfcbb = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfacb + mfccb;
+    m1 = mfccb - mfacb;
+    m0 = m2 + mfbcb;
+    mfacb = m0;
+    mfbcb = m1 - m0 * vvx;
+    mfccb = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfaac + mfcac;
+    m1 = mfcac - mfaac;
+    m0 = m2 + mfbac;
+    mfaac = m0;
+    m0 += c1o3 * oMdrho;
+    mfbac = m1 - m0 * vvx;
+    mfcac = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfabc + mfcbc;
+    m1 = mfcbc - mfabc;
+    m0 = m2 + mfbbc;
+    mfabc = m0;
+    mfbbc = m1 - m0 * vvx;
+    mfcbc = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    m2 = mfacc + mfccc;
+    m1 = mfccc - mfacc;
+    m0 = m2 + mfbcc;
+    mfacc = m0;
+    m0 += c1o9 * oMdrho;
+    mfbcc = m1 - m0 * vvx;
+    mfccc = m2 - 2. * m1 * vvx + vx2 * m0;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Cumulants
+    ////////////////////////////////////////////////////////////////////////////////////
+    real OxxPyyPzz = 1.; // omega2 or bulk viscosity
+
+    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+    real mxxMyy = mfcaa - mfaca;
+    real mxxMzz = mfcaa - mfaac;
+
+    real dxux = -c1o2 * collFactorF * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+    real dyuy = dxux + collFactorF * c3o2 * mxxMyy;
+    real dzuz = dxux + collFactorF * c3o2 * mxxMzz;
+
+    real Dxy = -c3o1 * collFactorF * mfbba;
+    real Dxz = -c3o1 * collFactorF * mfbab;
+    real Dyz = -c3o1 * collFactorF * mfabb;
+    real nu = c1o3 * (c1o1 / collFactorF - c1o2);
+
+    // average pre and post collision
+    std::array<real, 6> moments = { -c1o3 * mfaaa + c2o1*nu*dxux,
+                                    -c1o3 * mfaaa + c2o1*nu*dyuy,
+                                    -c1o3 * mfaaa + c2o1*nu*dzuz,
+                                     nu*Dxy,nu*Dxz,nu*Dyz};
+
+    return moments;
+}
 //Multiphase stuff
 //////////////////////////////////////////////////////////////////////////
 static void calcMultiphaseFeq(real *const &feq /*[27]*/, const real &rho, const real &p1, const real &vx1,
diff --git a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
index 5f3c4ad492b16c09b26acd00a624a54ad65dffda..571796d503a1a73b3eccf631a347884c7522b533 100644
--- a/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
+++ b/src/gpu/GridGenerator/TransientBCSetter/TransientBCSetter.cpp
@@ -417,7 +417,7 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
             {
                 numberOfFiles++;
 
-                printf("switching to precursor file no. %zu\n", numberOfFiles);
+                VF_LOG_INFO("PrecursorBC on level {}: switching to file no. {}\n", level, numberOfFiles);
                 if(numberOfFiles == this->fileCollection->files[level][id].size())
                     throw std::runtime_error("Not enough Precursor Files to read");
 
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index ba4eea50ffb6bc136528db31207274d626fe9b15..718a8d5da1de148c72ba67dd2d15c5e3b443e16a 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -75,14 +75,13 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
                                             ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
                                             // Overlap of BCs on edge nodes
                                             || grid->nodeHasBC(index) )
-            {
+            {   
                 grid->setFieldEntry(index, boundaryCondition->getType());
                 boundaryCondition->indices.push_back(index);
                 setPressureNeighborIndices(boundaryCondition, grid, index);
                 setStressSamplingIndices(boundaryCondition, grid, index);
-
+                // if(grid->getFieldEntry(index)==26) printf("index = %u, v1 = %f, v2 = %f, field entry=%u \n", index, v1, v2, grid->getFieldEntry(index) );
                 setQs(grid, boundaryCondition, index);
-
                 boundaryCondition->patches.push_back(0);
             }
         }
diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp
index 32cf9d07da87149695a5bf548ed357be2b2f71b4..9fc3c099e9f382c8d22fa64626c89219ef29c360 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cpp
+++ b/src/gpu/GridGenerator/grid/GridImp.cpp
@@ -2115,16 +2115,22 @@ void GridImp::sortFluidNodeIndicesMacroVars()
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
             this->fluidNodeIndicesMacroVars.erase(   std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
-                                                        [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                            this->fluidNodeIndicesMacroVars.end()
-                                        );
+                                                    [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
+                                                    this->fluidNodeIndicesMacroVars.end() );
+        }
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesMacroVars.erase(  std::remove_if(   this->fluidNodeIndicesMacroVars.begin(), this->fluidNodeIndicesMacroVars.end(),
+                                                    [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                    this->fluidNodeIndicesMacroVars.end() );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesMacroVars.begin(),fluidNodeIndicesMacroVars.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        this->fluidNodeIndices.end() );
     }
 }
 
@@ -2136,20 +2142,26 @@ void GridImp::sortFluidNodeIndicesApplyBodyForce()
         // Remove duplicates
         this->fluidNodeIndicesApplyBodyForce.erase( unique( this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end() ), this->fluidNodeIndicesApplyBodyForce.end() );
 
-         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesMacroVars
+         // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndicesApplyBodyForce
         if(this->fluidNodeIndicesAllFeatures.size()>0)
         {
-            this->fluidNodeIndicesApplyBodyForce.erase(   std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
+            this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                            this->fluidNodeIndicesApplyBodyForce.end()
-                                        );
+                                                        this->fluidNodeIndicesApplyBodyForce.end() );
+        }
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesApplyBodyForce
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesApplyBodyForce.erase( std::remove_if(   this->fluidNodeIndicesApplyBodyForce.begin(), this->fluidNodeIndicesApplyBodyForce.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                        this->fluidNodeIndicesApplyBodyForce.end() );
         }
 
         // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
-                                                        [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        [&](auto x){return binary_search(fluidNodeIndicesApplyBodyForce.begin(),fluidNodeIndicesApplyBodyForce.end(),x);} ),
+                                        this->fluidNodeIndices.end() );
     }
 }
 
@@ -2160,11 +2172,19 @@ void GridImp::sortFluidNodeIndicesAllFeatures()
         sort(this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end());
         // Remove duplicates
         this->fluidNodeIndicesAllFeatures.erase( unique( this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end() ), this->fluidNodeIndicesAllFeatures.end() );
-        // Remove indices of fluidNodeIndicesMacroVars from fluidNodeIndices
+
+        // Remove all indices in fluidNodeIndicesBorder from fluidNodeIndicesAllFeatures
+        if(this->fluidNodeIndicesBorder.size()>0)
+        {
+            this->fluidNodeIndicesAllFeatures.erase(    std::remove_if(   this->fluidNodeIndicesAllFeatures.begin(), this->fluidNodeIndicesAllFeatures.end(),
+                                                        [&](auto x){return binary_search(fluidNodeIndicesBorder.begin(),fluidNodeIndicesBorder.end(),x);} ),
+                                                        this->fluidNodeIndicesAllFeatures.end() );
+        }
+
+        // Remove indices of fluidNodeIndicesAllFeatures from fluidNodeIndices
         this->fluidNodeIndices.erase(   std::remove_if(   this->fluidNodeIndices.begin(), this->fluidNodeIndices.end(),
                                                         [&](auto x){return binary_search(fluidNodeIndicesAllFeatures.begin(),fluidNodeIndicesAllFeatures.end(),x);} ),
-                                        this->fluidNodeIndices.end()
-                                    );
+                                        this->fluidNodeIndices.end() );
     }
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
index 5df66c788b2186b0bdfad1afa4798cee670ba53b..89f3595cf6ed4919548d27d47ae987f89053e1d5 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
@@ -170,7 +170,7 @@ void Communicator::exchngDataGeo(int *sbuf_t, int *rbuf_t, int *sbuf_b, int *rbu
     MPI_Waitall(4, request, status);
 }
 int Communicator::getPID() const { return PID; }
-int Communicator::getNummberOfProcess() const { return numprocs; }
+int Communicator::getNumberOfProcess() const { return numprocs; }
 int Communicator::getNeighbourTop() { return nbrtop; }
 int Communicator::getNeighbourBottom() { return nbrbottom; }
 MPI_Comm Communicator::getCommunicator() { return comm1d; }
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
index aa63e1a09c305ceb5abe2b567b6988753bc48345..d7f57c5630270f87cd7df08492d8ff96585d7a50 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
@@ -34,7 +34,7 @@ public:
     void waitAll();
     void distributeGeometry(unsigned int *dataRoot, unsigned int *dataNode, int dataSizePerNode);
     int getPID() const override;
-    int getNummberOfProcess() const;
+    int getNumberOfProcess() const;
     int getNeighbourTop();
     int getNeighbourBottom();
     void exchngData(float *sbuf_t, float *rbuf_t, float *sbuf_b, float *rbuf_b, int count);
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 38a7eef7e356e2f2da4c1a819d8375035a37313a..c2f86721de26d516ed60f497a65d1d46a34aa182 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -1002,7 +1002,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes;
             QforBoundaryConditions &Q = para->getParH(i)->stressBC;
             getPointersToBoundaryConditions(Q, QQ, sizeQ);
-
+            
             builder->getStressQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopyStressBC(i);
diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
index 00a4c79574ce9d8ae372bfe9f7e546c05175bb10..49a6887ef2e462aba190023d334caa0012e2254e 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
+++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.cpp
@@ -6,7 +6,7 @@ void GridScalingFactory::setScalingFactory(const GridScalingFactory::GridScaling
     this->gridScaling = gridScalingType;
 }
 
-gridScalingFC GridScalingFactory::getGridScalingFC() const
+gridScalingFC GridScalingFactory::getGridScalingFC(bool hasTurbulentViscosity) const
 {
     // for descriptions of the scaling types refer to the header
     switch (gridScaling) {
@@ -14,14 +14,15 @@ gridScalingFC GridScalingFactory::getGridScalingFC() const
             return ScaleFC_RhoSq_comp_27;
             break;
         case GridScaling::ScaleCompressible:
-            return ScaleFC_compressible;
+            if(hasTurbulentViscosity)   return ScaleFC_compressible<true>;
+            else                        return ScaleFC_compressible<false>;
             break;
         default:
             return nullptr;
     }
 }
 
-gridScalingCF GridScalingFactory::getGridScalingCF() const
+gridScalingCF GridScalingFactory::getGridScalingCF(bool hasTurbulentViscosity) const
 {
     // for descriptions of the scaling types refer to the header
     switch (gridScaling) {
@@ -29,8 +30,11 @@ gridScalingCF GridScalingFactory::getGridScalingCF() const
             return ScaleCF_RhoSq_comp_27;
             break;
         case GridScaling::ScaleCompressible:
-            return ScaleCF_compressible;
-            break;
+            {
+                if(hasTurbulentViscosity)   return ScaleCF_compressible<true>;
+                else                        return ScaleCF_compressible<false>;
+                break;
+            }
         default:
             return nullptr;
     }
diff --git a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
index 7d7c20c63a01e2dba6a5578c6520c0ab06894b3c..d760240c2c5ed429799cd89e57704464515a92f5 100644
--- a/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
+++ b/src/gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h
@@ -59,8 +59,8 @@ public:
 
     void setScalingFactory(const GridScalingFactory::GridScaling gridScalingType);
 
-    [[nodiscard]] gridScalingFC getGridScalingFC() const;
-    [[nodiscard]] gridScalingCF getGridScalingCF() const;
+    [[nodiscard]] gridScalingFC getGridScalingFC(bool hasTurbulentViscosity) const;
+    [[nodiscard]] gridScalingCF getGridScalingCF(bool hasTurbulentViscosity) const;
 
 private:
     GridScaling gridScaling = GridScaling::NotSpecified;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index ae8cbb77ec2493126d64b90a7119cbfa3efee666..4a5b7816c1b6591e4193639bcdf71242e77688c0 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -1624,7 +1624,8 @@ void ScaleCF_staggered_time_comp_27( real* DC,
 												OffCF offCF);
 
 void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream);
-void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream);
+
+template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF &offsetCF, CUstream_st *stream);
 
 void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
@@ -1849,7 +1850,8 @@ void ScaleFC_staggered_time_comp_27( real* DC,
 												OffFC offFC);
 
 void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream);
-void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream);
+
+template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC& offsetFC, CUstream_st *stream);
 
 void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 3134db44346ee7f465a5c8f04505ee5749482fbf..0c3c7fcefc2bbb7bc87d7d95863c8c74f14735a3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -1860,7 +1860,7 @@ __global__ void scaleCF_RhoSq_comp_27( real* DC,
                                                   unsigned int nyF,
                                                   OffCF offCF);
 
-__global__ void scaleCF_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible(
     real* distributionsCoarse,
     real* distributionsFine,
     unsigned int* neighborXcoarse,
@@ -1877,6 +1877,8 @@ __global__ void scaleCF_compressible(
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
     OffCF offsetCF);
 
 __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
@@ -2263,7 +2265,7 @@ __global__ void scaleFC_RhoSq_comp_27( real* DC,
                                                   unsigned int nyF,
                                                   OffFC offFC);
 
-__global__ void scaleFC_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible(
     real *distributionsCoarse,
     real *distributionsFine,
     unsigned int *neighborXcoarse,
@@ -2280,6 +2282,8 @@ __global__ void scaleFC_compressible(
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
     OffFC offsetFC);
 
 __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
index 0724002cffa3a47820664851ffefd1c35dbe0235..84529ef2694b57448291957f7792360088bd954f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleCF_compressible.cu
@@ -218,7 +218,7 @@ __device__ __inline__ void interpolateDistributions(
 //!
 
 // based on scaleCF_RhoSq_comp_27
-__global__ void scaleCF_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleCF_compressible(
     real* distributionsCoarse, 
     real* distributionsFine, 
     unsigned int* neighborXcoarse,
@@ -235,6 +235,8 @@ __global__ void scaleCF_compressible(
     unsigned int numberOfInterfaceNodes, 
     real omegaCoarse, 
     real omegaFine, 
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
     OffCF offsetCF)
 {
     ////////////////////////////////////////////////////////////////////////////////
@@ -310,6 +312,8 @@ __global__ void scaleCF_compressible(
     unsigned int k_0MM = k_base_0MM;
     unsigned int k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM,
         kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM);
@@ -327,6 +331,8 @@ __global__ void scaleCF_compressible(
     k_0MM = neighborZcoarse[k_0MM];
     k_MMM = neighborZcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP,
         kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP);
@@ -344,6 +350,8 @@ __global__ void scaleCF_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP,
         kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP);
@@ -361,6 +369,8 @@ __global__ void scaleCF_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXcoarse[k_base_MM0];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM,
         kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM);
@@ -388,6 +398,8 @@ __global__ void scaleCF_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM,
         kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM);
@@ -404,6 +416,8 @@ __global__ void scaleCF_compressible(
     k_M0M = neighborZcoarse[k_M0M];
     k_0MM = neighborZcoarse[k_0MM];
     k_MMM = neighborZcoarse[k_MMM];
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
     
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP,
@@ -423,11 +437,12 @@ __global__ void scaleCF_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXcoarse[k_MMM];
 
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
+
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP,
         kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP);
 
-
     //////////////////////////////////////////////////////////////////////////
     // source node BNE = PPM
     //////////////////////////////////////////////////////////////////////////
@@ -440,6 +455,8 @@ __global__ void scaleCF_compressible(
     k_M00 = neighborXcoarse[k_base_M00];
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXcoarse[k_base_MM0];
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
     
     calculateMomentsOnSourceNodes( distCoarse, omegaC,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM,
@@ -883,28 +900,6 @@ __global__ void scaleCF_compressible(
     real y = -c1o4;
     real z = -c1o4;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    ////////////////////////////////////////////////////////////////////////////////
-    //! - Set moments (zeroth to sixth order) on destination node
-    //!
-    interpolateDistributions(
-        x, y, z,
-        m_000, 
-        m_100, m_010, m_001,
-        m_011, m_101, m_110, m_200, m_020, m_002,
-        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
-        m_022, m_202, m_220, m_211, m_121, m_112,
-        m_122, m_212, m_221,
-        m_222,
-        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
-        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
-        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
-        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
-        LaplaceRho, eps_new, omegaF, 
-        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
-    );
-
-    //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
     k_base_000 = indicesFineMMM[nodeIndex];
     k_base_M00 = neighborXfine [k_base_000];
@@ -924,6 +919,28 @@ __global__ void scaleCF_compressible(
     k_M0M = k_base_M0M;
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Set moments (zeroth to sixth order) on destination node
+    //!
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
+    interpolateDistributions(
+        x, y, z,
+        m_000, 
+        m_100, m_010, m_001,
+        m_011, m_101, m_110, m_200, m_020, m_002,
+        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
+        m_022, m_202, m_220, m_211, m_121, m_112,
+        m_122, m_212, m_221,
+        m_222,
+        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
+        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
+        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
+        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
+        LaplaceRho, eps_new, omegaF, 
+        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
+    );
 
     //////////////////////////////////////////////////////////////////////////
     //! - Write distributions: style of reading and writing the distributions from/to
@@ -968,9 +985,22 @@ __global__ void scaleCF_compressible(
     x = -c1o4;
     y = -c1o4;
     z =  c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_00M;
+    k_M00 = k_M0M;
+    k_0M0 = k_0MM;
+    k_00M = neighborZfine[k_00M];
+    k_MM0 = k_MMM;
+    k_M0M = neighborZfine[k_M0M];
+    k_0MM = neighborZfine[k_0MM];
+    k_MMM = neighborZfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -988,17 +1018,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_00M;
-    k_M00 = k_M0M;
-    k_0M0 = k_0MM;
-    k_00M = neighborZfine[k_00M];
-    k_MM0 = k_MMM;
-    k_M0M = neighborZfine[k_M0M];
-    k_0MM = neighborZfine[k_0MM];
-    k_MMM = neighborZfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1038,9 +1057,21 @@ __global__ void scaleCF_compressible(
     y = -c1o4;
     z =  c1o4;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_M00;
+    k_M00 = neighborXfine[k_M00];
+    k_0M0 = k_MM0;
+    k_00M = k_M0M;
+    k_MM0 = neighborXfine[k_MM0];
+    k_M0M = neighborXfine[k_M0M];
+    k_0MM = k_MMM;
+    k_MMM = neighborXfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1058,17 +1089,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_M00;
-    k_M00 = neighborXfine[k_M00];
-    k_0M0 = k_MM0;
-    k_00M = k_M0M;
-    k_MM0 = neighborXfine[k_MM0];
-    k_M0M = neighborXfine[k_M0M];
-    k_0MM = k_MMM;
-    k_MMM = neighborXfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1107,9 +1127,22 @@ __global__ void scaleCF_compressible(
     x =  c1o4;
     y = -c1o4;
     z = -c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_00M = k_000;
+    k_M0M = k_M00;
+    k_0MM = k_0M0;
+    k_MMM = k_MM0;
+    k_000 = k_base_M00;
+    k_M00 = neighborXfine[k_base_M00];
+    k_0M0 = k_base_MM0;
+    k_MM0 = neighborXfine[k_base_MM0];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1127,17 +1160,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_00M = k_000;
-    k_M0M = k_M00;
-    k_0MM = k_0M0;
-    k_MMM = k_MM0;
-    k_000 = k_base_M00;
-    k_M00 = neighborXfine[k_base_M00];
-    k_0M0 = k_base_MM0;
-    k_MM0 = neighborXfine[k_base_MM0];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1177,25 +1199,6 @@ __global__ void scaleCF_compressible(
     y =  c1o4;
     z = -c1o4;
     
-    ////////////////////////////////////////////////////////////////////////////////
-    // Set moments (zeroth to sixth orders) on destination node
-    interpolateDistributions(
-        x, y, z,
-        m_000, 
-        m_100, m_010, m_001,
-        m_011, m_101, m_110, m_200, m_020, m_002,
-        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
-        m_022, m_202, m_220, m_211, m_121, m_112,
-        m_122, m_212, m_221,
-        m_222,
-        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
-        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
-        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
-        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
-        LaplaceRho, eps_new, omegaF, 
-        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
-    );
-
     //////////////////////////////////////////////////////////////////////////
     // index of the base node and its neighbors
     k_base_000 = k_base_0M0;
@@ -1218,6 +1221,28 @@ __global__ void scaleCF_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    ////////////////////////////////////////////////////////////////////////////////
+    // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
+    interpolateDistributions(
+        x, y, z,
+        m_000, 
+        m_100, m_010, m_001,
+        m_011, m_101, m_110, m_200, m_020, m_002,
+        m_111, m_210, m_012, m_201, m_021, m_120, m_102,
+        m_022, m_202, m_220, m_211, m_121, m_112,
+        m_122, m_212, m_221,
+        m_222,
+        a_000, a_100, a_010, a_001, a_200, a_020, a_002, a_110,  a_101, a_011, a_111,
+        b_000, b_100, b_010, b_001, b_200, b_020, b_002, b_110,  b_101, b_011, b_111,
+        c_000, c_100, c_010, c_001, c_200, c_020, c_002, c_110,  c_101, c_011, c_111,
+        d_000, d_100, d_010, d_001, d_110, d_101, d_011, d_111,
+        LaplaceRho, eps_new, omegaF, 
+        kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
+    );
+
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1256,9 +1281,22 @@ __global__ void scaleCF_compressible(
     x = -c1o4;
     y =  c1o4;
     z =  c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_00M;
+    k_M00 = k_M0M;
+    k_0M0 = k_0MM;
+    k_00M = neighborZfine[k_00M];
+    k_MM0 = k_MMM;
+    k_M0M = neighborZfine[k_M0M];
+    k_0MM = neighborZfine[k_0MM];
+    k_MMM = neighborZfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1276,17 +1314,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_00M;
-    k_M00 = k_M0M;
-    k_0M0 = k_0MM;
-    k_00M = neighborZfine[k_00M];
-    k_MM0 = k_MMM;
-    k_M0M = neighborZfine[k_M0M];
-    k_0MM = neighborZfine[k_0MM];
-    k_MMM = neighborZfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1325,9 +1352,22 @@ __global__ void scaleCF_compressible(
     x = c1o4;
     y = c1o4;
     z = c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_000 = k_M00;
+    k_M00 = neighborXfine[k_M00];
+    k_0M0 = k_MM0;
+    k_00M = k_M0M;
+    k_MM0 = neighborXfine[k_MM0];
+    k_M0M = neighborXfine[k_M0M];
+    k_0MM = k_MMM;
+    k_MMM = neighborXfine[k_MMM];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1345,17 +1385,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_000 = k_M00;
-    k_M00 = neighborXfine[k_M00];
-    k_0M0 = k_MM0;
-    k_00M = k_M0M;
-    k_MM0 = neighborXfine[k_MM0];
-    k_M0M = neighborXfine[k_M0M];
-    k_0MM = k_MMM;
-    k_MMM = neighborXfine[k_MMM];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1394,9 +1423,22 @@ __global__ void scaleCF_compressible(
     x =  c1o4;
     y =  c1o4;
     z = -c1o4;
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Set neighbor indices
+    k_00M = k_000;
+    k_M0M = k_M00;
+    k_0MM = k_0M0;
+    k_MMM = k_MM0;
+    k_000 = k_base_M00;
+    k_M00 = neighborXfine[k_base_M00];
+    k_0M0 = k_base_MM0;
+    k_MM0 = neighborXfine[k_base_MM0];
 
     ////////////////////////////////////////////////////////////////////////////////
     // Set moments (zeroth to sixth orders) on destination node
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     interpolateDistributions(
         x, y, z,
         m_000, 
@@ -1414,17 +1456,6 @@ __global__ void scaleCF_compressible(
         kxxMyyAverage, kxxMzzAverage, kyzAverage, kxzAverage, kxyAverage
     );
 
-    ////////////////////////////////////////////////////////////////////////////////////
-    // Set neighbor indices
-    k_00M = k_000;
-    k_M0M = k_M00;
-    k_0MM = k_0M0;
-    k_MMM = k_MM0;
-    k_000 = k_base_M00;
-    k_M00 = neighborXfine[k_base_M00];
-    k_0M0 = k_base_MM0;
-    k_MM0 = neighborXfine[k_base_MM0];
-
     //////////////////////////////////////////////////////////////////////////
     // Write distributions
     (distFine.f[DIR_000])[k_000] = f_000;
@@ -1455,3 +1486,7 @@ __global__ void scaleCF_compressible(
     (distFine.f[DIR_PMM])[k_0MM] = f_PMM;
     (distFine.f[DIR_MMM])[k_MMM] = f_MMM;
 }
+
+template __global__ void scaleCF_compressible<true>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffCF offsetCF);
+
+template __global__ void scaleCF_compressible<false>( real* distributionsCoarse, real* distributionsFine, unsigned int* neighborXcoarse, unsigned int* neighborYcoarse, unsigned int* neighborZcoarse, unsigned int* neighborXfine, unsigned int* neighborYfine, unsigned int* neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int* indicesCoarseMMM, unsigned int* indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffCF offsetCF);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
index e7d999d108e59bca98bf87b813f9479f1c601266..c89a524c1dd63f426254c395d1e4881a7e96ce7a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/GridScaling/scaleFC_compressible.cu
@@ -46,7 +46,7 @@ using namespace vf::gpu;
 //!
 
 // based on scaleFC_RhoSq_comp_27
-__global__ void scaleFC_compressible(
+template<bool hasTurbulentViscosity> __global__ void scaleFC_compressible(
     real *distributionsCoarse,
     real *distributionsFine,
     unsigned int *neighborXcoarse,
@@ -63,6 +63,8 @@ __global__ void scaleFC_compressible(
     unsigned int numberOfInterfaceNodes,
     real omegaCoarse,
     real omegaFine,
+    real* turbulentViscosityCoarse,
+    real* turbulentViscosityFine,
     OffFC offsetFC)
 {
     ////////////////////////////////////////////////////////////////////////////////
@@ -138,6 +140,8 @@ __global__ void scaleFC_compressible(
     unsigned int k_0MM = k_base_0MM;
     unsigned int k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMM, vx1_MMM, vx2_MMM, vx3_MMM,
         kxyFromfcNEQ_MMM, kyzFromfcNEQ_MMM, kxzFromfcNEQ_MMM, kxxMyyFromfcNEQ_MMM, kxxMzzFromfcNEQ_MMM);
@@ -155,6 +159,8 @@ __global__ void scaleFC_compressible(
     k_0MM = neighborZfine[k_0MM];
     k_MMM = neighborZfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MMP, vx1_MMP, vx2_MMP, vx3_MMP,
         kxyFromfcNEQ_MMP, kyzFromfcNEQ_MMP, kxzFromfcNEQ_MMP, kxxMyyFromfcNEQ_MMP, kxxMzzFromfcNEQ_MMP);
@@ -172,6 +178,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMP, vx1_PMP, vx2_PMP, vx3_PMP,
         kxyFromfcNEQ_PMP, kyzFromfcNEQ_PMP, kxzFromfcNEQ_PMP, kxxMyyFromfcNEQ_PMP, kxxMzzFromfcNEQ_PMP);
@@ -189,6 +197,8 @@ __global__ void scaleFC_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXfine[k_base_MM0];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PMM, vx1_PMM, vx2_PMM, vx3_PMM,
         kxyFromfcNEQ_PMM, kyzFromfcNEQ_PMM, kxzFromfcNEQ_PMM, kxxMyyFromfcNEQ_PMM, kxxMzzFromfcNEQ_PMM);
@@ -216,6 +226,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_base_0MM;
     k_MMM = k_base_MMM;
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPM, vx1_MPM, vx2_MPM, vx3_MPM,
         kxyFromfcNEQ_MPM, kyzFromfcNEQ_MPM, kxzFromfcNEQ_MPM, kxxMyyFromfcNEQ_MPM, kxxMzzFromfcNEQ_MPM);
@@ -232,6 +244,8 @@ __global__ void scaleFC_compressible(
     k_M0M = neighborZfine[k_M0M];
     k_0MM = neighborZfine[k_0MM];
     k_MMM = neighborZfine[k_MMM];
+
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
     
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_MPP, vx1_MPP, vx2_MPP, vx3_MPP,
@@ -250,6 +264,8 @@ __global__ void scaleFC_compressible(
     k_0MM = k_MMM;
     k_MMM = neighborXfine[k_MMM];
 
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPP, vx1_PPP, vx2_PPP, vx3_PPP,
         kxyFromfcNEQ_PPP, kyzFromfcNEQ_PPP, kxzFromfcNEQ_PPP, kxxMyyFromfcNEQ_PPP, kxxMzzFromfcNEQ_PPP);
@@ -267,6 +283,8 @@ __global__ void scaleFC_compressible(
     k_0M0 = k_base_MM0;
     k_MM0 = neighborXfine[k_base_MM0];
     
+    if(hasTurbulentViscosity) omegaF = omegaFine/ (c1o1 + c3o1*omegaFine*turbulentViscosityFine[k_000]);
+
     calculateMomentsOnSourceNodes( distFine, omegaF,
         k_000, k_M00, k_0M0, k_00M, k_MM0, k_M0M, k_0MM, k_MMM, drho_PPM, vx1_PPM, vx2_PPM, vx3_PPM,
         kxyFromfcNEQ_PPM, kyzFromfcNEQ_PPM, kxzFromfcNEQ_PPM, kxxMyyFromfcNEQ_PPM, kxxMzzFromfcNEQ_PPM);
@@ -540,6 +558,18 @@ __global__ void scaleFC_compressible(
     // y = 0.;
     // z = 0.;
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // index of the destination node and its neighbors
+    k_000 = indicesCoarse000[nodeIndex];
+    k_M00 = neighborXcoarse [k_000];
+    k_0M0 = neighborYcoarse [k_000];
+    k_00M = neighborZcoarse [k_000];
+    k_MM0 = neighborYcoarse [k_M00];
+    k_M0M = neighborZcoarse [k_M00];
+    k_0MM = neighborZcoarse [k_0M0];
+    k_MMM = neighborZcoarse [k_MM0];
+    ////////////////////////////////////////////////////////////////////////////////////
+
+    if(hasTurbulentViscosity) omegaC = omegaCoarse / (c1o1 + c3o1*omegaCoarse*turbulentViscosityCoarse[k_000]);
 
     ////////////////////////////////////////////////////////////////////////////////
     //! - Set macroscopic values on destination node (zeroth and first order moments)
@@ -642,19 +672,6 @@ __global__ void scaleFC_compressible(
     backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz_sq, c9o1,  c1o9);
     backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz_sq, c36o1, c1o36);
 
-
-    ////////////////////////////////////////////////////////////////////////////////////
-    // index of the destination node and its neighbors
-    k_000 = indicesCoarse000[nodeIndex];
-    k_M00 = neighborXcoarse [k_000];
-    k_0M0 = neighborYcoarse [k_000];
-    k_00M = neighborZcoarse [k_000];
-    k_MM0 = neighborYcoarse [k_M00];
-    k_M0M = neighborZcoarse [k_M00];
-    k_0MM = neighborZcoarse [k_0M0];
-    k_MMM = neighborZcoarse [k_MM0];
-    ////////////////////////////////////////////////////////////////////////////////////
-
     ////////////////////////////////////////////////////////////////////////////////////
     //! - Write distributions: style of reading and writing the distributions from/to
     //! stored arrays dependent on timestep is based on the esoteric twist algorithm
@@ -690,3 +707,7 @@ __global__ void scaleFC_compressible(
     (distCoarse.f[DIR_MMM])[k_MMM] = f_MMM;
     ////////////////////////////////////////////////////////////////////////////////////
 }
+
+template __global__ void scaleFC_compressible<true>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffFC offsetFC);
+
+template __global__ void scaleFC_compressible<false>( real *distributionsCoarse, real *distributionsFine, unsigned int *neighborXcoarse, unsigned int *neighborYcoarse, unsigned int *neighborZcoarse, unsigned int *neighborXfine, unsigned int *neighborYfine, unsigned int *neighborZfine, unsigned long long numberOfLBnodesCoarse, unsigned long long numberOfLBnodesFine, bool isEvenTimestep, unsigned int *indicesCoarse000, unsigned int *indicesFineMMM, unsigned int numberOfInterfaceNodes, real omegaCoarse, real omegaFine, real* turbulentViscosityCoarse, real* turbulentViscosityFine, OffFC offsetFC);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 4faea21102b6a68dd9a0aa30e9cecc7eba6051b0..9abac27969e74dc90ecdcc707f4fcb2234010d07 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -4054,12 +4054,12 @@ void ScaleCF_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulat
     getLastCudaError("scaleCF_RhoSq_27 execution failed");
 }
 
-void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
+template<bool hasTurbulentViscosity> void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream)
 {
     dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellCF->kCF);
     dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
 
-    scaleCF_compressible<<<grid, threads, 0, stream>>>(
+    scaleCF_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>(
         parameterDeviceC->distributions.f[0],
         parameterDeviceF->distributions.f[0],
         parameterDeviceC->neighborX,
@@ -4076,9 +4076,14 @@ void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati
         icellCF->kCF,
         parameterDeviceC->omega,
         parameterDeviceF->omega,
+        parameterDeviceC->turbViscosity,
+        parameterDeviceF->turbViscosity,
         offsetCF);
+
     getLastCudaError("scaleCF_compressible execution failed");
 }
+template void ScaleCF_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream);
+template void ScaleCF_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellCF * icellCF, OffCF& offsetCF, CUstream_st *stream);
 
 //////////////////////////////////////////////////////////////////////////
 void ScaleCF_RhoSq_3rdMom_comp_27(
@@ -4946,12 +4951,12 @@ void ScaleFC_RhoSq_comp_27(LBMSimulationParameter * parameterDeviceC, LBMSimulat
     getLastCudaError("scaleFC_RhoSq_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
+template<bool hasTurbulentViscosity> void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream)
 {
     dim3 grid = vf::cuda::getCudaGrid(parameterDeviceC->numberofthreads,  icellFC->kFC);
     dim3 threads(parameterDeviceC->numberofthreads, 1, 1 );
 
-    scaleFC_compressible<<<grid, threads, 0, stream>>>(
+    scaleFC_compressible<hasTurbulentViscosity><<<grid, threads, 0, stream>>>(
         parameterDeviceC->distributions.f[0],
         parameterDeviceF->distributions.f[0],
         parameterDeviceC->neighborX,
@@ -4968,9 +4973,15 @@ void ScaleFC_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati
         icellFC->kFC,
         parameterDeviceC->omega,
         parameterDeviceF->omega,
+        parameterDeviceC->turbViscosity,
+        parameterDeviceF->turbViscosity,
         offsetFC);
+
     getLastCudaError("scaleFC_compressible execution failed");
 }
+template void ScaleFC_compressible<true>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream);
+template void ScaleFC_compressible<false>(LBMSimulationParameter * parameterDeviceC, LBMSimulationParameter* parameterDeviceF, ICellFC * icellFC, OffFC &offsetFC, CUstream_st *stream);
+
 //////////////////////////////////////////////////////////////////////////
 void ScaleFC_RhoSq_3rdMom_comp_27(
     real* DC,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index cc8ca53d15ac02686b850a70ab181bb47285a7d1..80e3c273987092aa63e4a2724df0df3df7152145 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -32,6 +32,7 @@
 //======================================================================================
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
 #include "lbm/constants/NumericConstants.h"
 #include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
@@ -52,67 +53,8 @@ __global__ void QSlipDevice27(
     unsigned long long numberOfLBnodes, 
     bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-   } 
-   else
-   {
-      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
    const unsigned  y = blockIdx.x;   // Globaler y-Index 
@@ -237,66 +179,8 @@ __global__ void QSlipDevice27(
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-      } 
-      else
-      {
-         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
       //(D.f[DIR_000])[k]=c1o10;
@@ -1248,9 +1132,7 @@ __global__ void BBSlipDeviceComp27(
       //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
       //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
       //!
-      Distributions27 dist;
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
-
+      Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local subgrid distances (q's)
       //!
@@ -1338,13 +1220,13 @@ __global__ void BBSlipDeviceComp27(
                    (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
                    (f_T - f_B)) / (c1o1 + drho);
 
-      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
+      // real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
       //!
-      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
+      dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////
       //! - Multiply the local velocities by the slipLength
       //!
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 3208299e93940dabe52faa7d0b3c684c45596660..0838402693e469efb10be2f9cd59094107383b66 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -42,6 +42,7 @@
 
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
 #include <lbm/constants/NumericConstants.h>
 #include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
@@ -107,7 +108,7 @@ __host__ __device__ __forceinline__ void iMEM(
       real _vz_w = vz_w_inst-vDotN_w*wallNormalZ;
 
       //Compute wall shear stress tau_w via MOST
-      real z = (real)samplingOffset[k] + 0.5; //assuming q=0.5, could be replaced by wall distance via wall normal
+      real z = (real)samplingOffset[k] + q; //assuming q=0.5, could be replaced by wall distance via wall normal
       real kappa = 0.4;
       real u_star = vMag_el*kappa/(log(z/z0[k]));
       if(hasWallModelMonitor) u_star_monitor[k] = u_star;
@@ -137,6 +138,7 @@ __host__ __device__ __forceinline__ void iMEM(
       wallVelocityZ = clipVz > -clipVz? min(clipVz, max(-clipVz, -3.0*F_z*forceFactor)): max(clipVz, min(-clipVz, -3.0*F_z*forceFactor));
 }
 
+
 //////////////////////////////////////////////////////////////////////////////
 __global__ void QStressDeviceComp27(
     real* DD,
@@ -172,67 +174,8 @@ __global__ void QStressDeviceComp27(
     bool isEvenTimestep)
 {
 
-   Distributions27 D;
-   if (isEvenTimestep==true)//get right array of post coll f's
-   {
-      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-   }
-   else
-   {
-      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -362,66 +305,8 @@ __global__ void QStressDeviceComp27(
 
       real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]]));
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)      //get adress where incoming f's should be written to
-      {
-         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-      }
-      else
-      {
-         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Compute incoming f's with zero wall velocity
       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -437,7 +322,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb);
+         // f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb);
+         f_W_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_E, f_W, feq, om_turb, drho, c2o27);
          wallMomentumX += f_E+f_W_in;
       }
 
@@ -446,7 +332,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb);
+         // f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb);
+         f_E_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_W, f_E, feq, om_turb, drho, c2o27);
          wallMomentumX -= f_W+f_E_in;
       }
 
@@ -455,7 +342,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb);
+         // f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb);
+         f_S_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_N, f_S, feq, om_turb, drho, c2o27);
          wallMomentumY += f_N+f_S_in;
       }
 
@@ -464,7 +352,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb);
+         // f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb);
+         f_N_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_S, f_N, feq, om_turb, drho, c2o27);
          wallMomentumY -= f_S+f_N_in;
       }
 
@@ -473,7 +362,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb);
+         // f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb);
+         f_B_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_T, f_B, feq, om_turb, drho, c2o27);
          wallMomentumZ += f_T+f_B_in;
       }
 
@@ -482,7 +372,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb);
+         // f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb);
+         f_T_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_B, f_T, feq, om_turb, drho, c2o27);
          wallMomentumZ -= f_B+f_T_in;
       }
 
@@ -491,7 +382,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb);
+         // f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb);
+         f_SW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, c2o27);
          wallMomentumX += f_NE+f_SW_in;
          wallMomentumY += f_NE+f_SW_in;
       }
@@ -501,7 +393,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb);
+         // f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb);
+         f_NE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_SW+f_NE_in;
          wallMomentumY -= f_SW+f_NE_in;
       }
@@ -511,7 +404,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb);
+         // f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb);
+         f_NW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_SE+f_NW_in;
          wallMomentumY -= f_SE+f_NW_in;
       }
@@ -521,7 +415,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb);
+         // f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb);
+         f_SE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_NW+f_SE_in;
          wallMomentumY += f_NW+f_SE_in;
       }
@@ -531,7 +426,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb);
+         // f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb);
+         f_BW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_TE+f_BW_in;
          wallMomentumZ += f_TE+f_BW_in;
       }
@@ -541,7 +437,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb);
+         // f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb);
+         f_TE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_BW+f_TE_in;
          wallMomentumZ -= f_BW+f_TE_in;
       }
@@ -551,7 +448,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb);
+         // f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb);
+         f_TW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, c1o54);
          wallMomentumX += f_BE+f_TW_in;
          wallMomentumZ -= f_BE+f_TW_in;
       }
@@ -561,7 +459,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb);
+         // f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb);
+         f_BE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, c1o54);
          wallMomentumX -= f_TW+f_BE_in;
          wallMomentumZ += f_TW+f_BE_in;
       }
@@ -571,7 +470,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb);
+         // f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb);
+         f_BS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, c1o54);
          wallMomentumY += f_TN+f_BS_in;
          wallMomentumZ += f_TN+f_BS_in;
       }
@@ -581,7 +481,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb);
+         // f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb);
+         f_TN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BS, f_TN, feq, om_turb, drho, c1o54);
          wallMomentumY -= f_BS+f_TN_in;
          wallMomentumZ -= f_BS+f_TN_in;
       }
@@ -591,7 +492,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb);
+         // f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb);
+         f_TS_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, c1o54);
          wallMomentumY += f_BN+f_TS_in;
          wallMomentumZ -= f_BN+f_TS_in;
       }
@@ -601,7 +503,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb);
+         // f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb);
+         f_BN_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, c1o54);
          wallMomentumY -= f_TS+f_BN_in;
          wallMomentumZ += f_TS+f_BN_in;
       }
@@ -611,7 +514,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb);
+         // f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb);
+         f_BSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_TNE+f_BSW_in;
          wallMomentumY += f_TNE+f_BSW_in;
          wallMomentumZ += f_TNE+f_BSW_in;
@@ -622,7 +526,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb);
+         // f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb);
+         f_TNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSW, f_TNE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_BSW+f_TNE_in;
          wallMomentumY -= f_BSW+f_TNE_in;
          wallMomentumZ -= f_BSW+f_TNE_in;
@@ -633,7 +538,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb);
+         // f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb);
+         f_TSW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_BNE+f_TSW_in;
          wallMomentumY += f_BNE+f_TSW_in;
          wallMomentumZ -= f_BNE+f_TSW_in;
@@ -644,7 +550,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb);
+         // f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb);
+         f_BNE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_TSW+f_BNE_in;
          wallMomentumY -= f_TSW+f_BNE_in;
          wallMomentumZ += f_TSW+f_BNE_in;
@@ -655,7 +562,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb);
+         // f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb);
+         f_BNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_TSE+f_BNW_in;
          wallMomentumY -= f_TSE+f_BNW_in;
          wallMomentumZ += f_TSE+f_BNW_in;
@@ -666,7 +574,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb);
+         // f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb);
+         f_TSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_BNW+f_TSE_in;
          wallMomentumY += f_BNW+f_TSE_in;
          wallMomentumZ -= f_BNW+f_TSE_in;
@@ -677,7 +586,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb);
+         // f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb);
+         f_TNW_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, c1o216);
          wallMomentumX += f_BSE+f_TNW_in;
          wallMomentumY -= f_BSE+f_TNW_in;
          wallMomentumZ -= f_BSE+f_TNW_in;
@@ -688,7 +598,8 @@ __global__ void QStressDeviceComp27(
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb);
+         // f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb);
+         f_BSE_in = getInterpolatedDistributionForNoSlipWithPressureBC(q, f_TNW, f_BSE, feq, om_turb, drho, c1o216);
          wallMomentumX -= f_TNW+f_BSE_in;
          wallMomentumY += f_TNW+f_BSE_in;
          wallMomentumZ += f_TNW+f_BSE_in;
@@ -699,7 +610,7 @@ __global__ void QStressDeviceComp27(
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
@@ -974,67 +885,9 @@ __global__ void BBStressDevice27( real* DD,
                                              unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-   }
-   else
-   {
-      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-   }
+
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -1162,66 +1015,8 @@ __global__ void BBStressDevice27( real* DD,
                  (f_T - f_B)) / (c1o1 + drho);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-      }
-      else
-      {
-         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-      }
+
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
          f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in;
@@ -1445,7 +1240,7 @@ __global__ void BBStressDevice27( real* DD,
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
@@ -1721,67 +1516,8 @@ __global__ void BBStressPressureDevice27( real* DD,
                                              unsigned long long numberOfLBnodes,
                                              bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-   }
-   else
-   {
-      D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-      D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-      D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-      D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-      D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-      D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-      D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-      D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-      D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-      D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-      D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-      D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-      D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-      D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-      D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-      D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-      D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-      D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-      D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-      D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-      D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-      D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-      D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-      D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-      D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-      D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-      D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-   }
+   Distributions27 D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, isEvenTimestep);
+
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
    const unsigned  y = blockIdx.x;   // Globaler y-Index
@@ -1909,66 +1645,8 @@ __global__ void BBStressPressureDevice27( real* DD,
                  (f_T - f_B)) / (c1o1 + drho);
 
       //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[DIR_P00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_M00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_PMP * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_MPM * numberOfLBnodes];
-      }
-      else
-      {
-         D.f[DIR_M00] = &DD[DIR_P00 * numberOfLBnodes];
-         D.f[DIR_P00] = &DD[DIR_M00 * numberOfLBnodes];
-         D.f[DIR_0M0] = &DD[DIR_0P0 * numberOfLBnodes];
-         D.f[DIR_0P0] = &DD[DIR_0M0 * numberOfLBnodes];
-         D.f[DIR_00M] = &DD[DIR_00P * numberOfLBnodes];
-         D.f[DIR_00P] = &DD[DIR_00M * numberOfLBnodes];
-         D.f[DIR_MM0] = &DD[DIR_PP0 * numberOfLBnodes];
-         D.f[DIR_PP0] = &DD[DIR_MM0 * numberOfLBnodes];
-         D.f[DIR_MP0] = &DD[DIR_PM0 * numberOfLBnodes];
-         D.f[DIR_PM0] = &DD[DIR_MP0 * numberOfLBnodes];
-         D.f[DIR_M0M] = &DD[DIR_P0P * numberOfLBnodes];
-         D.f[DIR_P0P] = &DD[DIR_M0M * numberOfLBnodes];
-         D.f[DIR_M0P] = &DD[DIR_P0M * numberOfLBnodes];
-         D.f[DIR_P0M] = &DD[DIR_M0P * numberOfLBnodes];
-         D.f[DIR_0MM] = &DD[DIR_0PP * numberOfLBnodes];
-         D.f[DIR_0PP] = &DD[DIR_0MM * numberOfLBnodes];
-         D.f[DIR_0MP] = &DD[DIR_0PM * numberOfLBnodes];
-         D.f[DIR_0PM] = &DD[DIR_0MP * numberOfLBnodes];
-         D.f[DIR_000] = &DD[DIR_000 * numberOfLBnodes];
-         D.f[DIR_PPP] = &DD[DIR_MMM * numberOfLBnodes];
-         D.f[DIR_MMP] = &DD[DIR_PPM * numberOfLBnodes];
-         D.f[DIR_PMP] = &DD[DIR_MPM * numberOfLBnodes];
-         D.f[DIR_MPP] = &DD[DIR_PMM * numberOfLBnodes];
-         D.f[DIR_PPM] = &DD[DIR_MMP * numberOfLBnodes];
-         D.f[DIR_MMM] = &DD[DIR_PPP * numberOfLBnodes];
-         D.f[DIR_PMM] = &DD[DIR_MPP * numberOfLBnodes];
-         D.f[DIR_MPM] = &DD[DIR_PMP * numberOfLBnodes];
-      }
+      D = vf::gpu::getDistributionReferences27(DD, numberOfLBnodes, !isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
          f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in;
@@ -2192,7 +1870,7 @@ __global__ void BBStressPressureDevice27( real* DD,
       // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
 
-      q = 0.5f;
+      q = q_dirB[k];
       real eps = 0.001f;
 
       iMEM( k, k_N[k],
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 50b4460d774010ea7d7b98cfa6fa505cdfeb88c2..d83901a0f7d6a7df8120673a4b14371a6e935aef 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -5,7 +5,6 @@
 
 #include "LBM/LB.h" 
 
-#include "Kernel/Utilities/KernelGroup.h"
 #include "PreProcessor/PreProcessorType.h"
 #include "Parameter/CudaStreamManager.h"
 
@@ -20,6 +19,5 @@ public:
 
     virtual bool checkParameter()                                = 0;
     virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
-    virtual KernelGroup getKernelGroup()                         = 0;
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 9bd3945aa81147d03be2b1eac3ddec7c24d71532..328cf8db260bc0092cb2081998961d1e9fb17233 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -19,11 +19,6 @@ std::vector<PreProcessorType> KernelImp::getPreProcessorTypes()
     return myPreProcessorTypes;
 }
 
-KernelGroup KernelImp::getKernelGroup() 
-{ 
-    return myKernelGroup; 
-}
-
 void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy)
 {
     this->checkStrategy = strategy;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index a96c2c123472ca33f635273e06a5bf36a745654d..84e5f3f6ac08b92ccd92fbf142cceb3245de51d5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -20,7 +20,6 @@ public:
 
     bool checkParameter();
     std::vector<PreProcessorType> getPreProcessorTypes();
-    KernelGroup getKernelGroup();
 
     void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
     bool getKernelUsesFluidNodeIndices();
@@ -33,8 +32,6 @@ protected:
     std::shared_ptr<CheckParameterStrategy> checkStrategy;
     int level;
     std::vector<PreProcessorType> myPreProcessorTypes;
-    KernelGroup myKernelGroup;
-
     vf::cuda::CudaGrid cudaGrid;
 
     bool kernelUsesFluidNodeIndices = false;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
index d4d6307f688da4c8fa37c54fb4958681d5ec4941..dd30516ac4229908a418d932177c1b63d8f5d685 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
@@ -33,7 +33,6 @@ ADComp27::ADComp27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompAD27);
 
-	myKernelGroup = ADKernel27;
 }
 
 ADComp27::ADComp27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
index 3ee06a1e9ea77c8443d94f44ea54d11ffe7304ac..d218489c754edc89f99277670f09536962ce62b2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
@@ -33,7 +33,6 @@ ADComp7::ADComp7(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompAD7);
 
-	myKernelGroup = ADKernel7;
 }
 
 ADComp7::ADComp7()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
index f2a9feaa998b628fb782844d1a7d946317e5af5f..150245a312509d50b77cf86fec18fdb063dbcc2c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
@@ -33,7 +33,6 @@ ADIncomp27::ADIncomp27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitIncompAD27);
 
-	myKernelGroup = ADKernel27;
 }
 
 ADIncomp27::ADIncomp27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
index d0c6a6a24ab4d0ebebee9324bdafa1f9e3db51b9..71adc96eef733084e01fa963f6d0fad66a2e1062 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
@@ -33,7 +33,6 @@ ADIncomp7::ADIncomp7(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitIncompAD7);
 
-	myKernelGroup = ADKernel7;
 }
 
 ADIncomp7::ADIncomp7()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
index 8c99f3b030984aef6215d5479be4b321145ee54f..4aef26b7dd31435b2dadceb78ac1e0b7ebedf029 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.cu
@@ -31,7 +31,7 @@ BGKCompSP27::BGKCompSP27(std::shared_ptr<Parameter> para, int level)
 	this->level = level;
 
 	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKCompSP27::BGKCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
index a4b136d1c21b1e4c68432eef5e21ff8c968bdfec..00aaf3c27f16a5d53e7aee225214f05bd62a541a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.cu
@@ -32,7 +32,7 @@ BGKPlusCompSP27::BGKPlusCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKPlusCompSP27::BGKPlusCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
index 1107d343801f8ac3626b03a93ca92415217732ac..0a5ac6cf7a1b6564a61d0150b187b10b584222b8 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -24,7 +24,7 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
index dcfda06db462fd83120751a32a40365445d659ba..664b46fcebd277b0c93300d86b2171edf4f91b2a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.cu
@@ -32,7 +32,7 @@ CascadeCompSP27::CascadeCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CascadeCompSP27::CascadeCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
index 7817c398285dda131401bd14c3ccdd8c119c5680..218623b7c51099717f6aaa6f375a82516e0c0dae 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.cu
@@ -33,7 +33,7 @@ CumulantCompSP27::CumulantCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantCompSP27::CumulantCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
index 1518dcc209de1edf8a88dae72c1f10c3d4666610..c8aad41b87ef39514f6cf5abc8b8bff42a869346 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.cu
@@ -35,5 +35,5 @@ CumulantAll4CompSP27::CumulantAll4CompSP27(std::shared_ptr<Parameter> para, int
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
index 5a480e5d9c97126e491655b4bbe2aeefef3e7161..09a3aa1cdb1a3cf9c01002a9d335c5a907f94917 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.cu
@@ -50,5 +50,5 @@ CumulantK15Comp::CumulantK15Comp(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
index 51876f30b8c8e37d8cb3355edde5dcf2b04675d0..f0e29a9740438bc78d39574e1046d937cd7b86ce 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.cu
@@ -34,7 +34,7 @@ CumulantK15BulkComp::CumulantK15BulkComp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15BulkComp::CumulantK15BulkComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
index 613464125bafc572fe7951b8c372e3455ea5b21d..69f84b0671c11fad8ae15676230c491ee815153d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.cu
@@ -35,7 +35,7 @@ CumulantK15SpongeComp::CumulantK15SpongeComp(std::shared_ptr<Parameter> para, in
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15SpongeComp::CumulantK15SpongeComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index 24b0bbc6f43a63093da6b6dcb3ce401b8a614f75..c95289f15fe13decbbe173e17f5d4255b8ef80b5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -23,7 +23,7 @@ CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int leve
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
index ea3442fecca63fdcb45878d742a547ce492ab5c8..b31e4964b609bcee1c3015dcf950b540977f8333 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17.cu
@@ -94,7 +94,9 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->isEvenTimestep,
                                                                                                                         indices,
                                                                                                                         size_indices);
-            break;	case CollisionTemplate::ApplyBodyForce:
+            break;
+
+        case CollisionTemplate::ApplyBodyForce:
             LB_Kernel_CumulantK17 < turbulenceModel, false, true  > <<< cudaGrid.grid, cudaGrid.threads, 0, stream >>>( para->getParD(level)->omega,
                                                                                                                         para->getParD(level)->neighborX, para->getParD(level)->neighborY, para->getParD(level)->neighborZ,
                                                                                                                         para->getParD(level)->distributions.f[0],
@@ -110,7 +112,8 @@ void CumulantK17<turbulenceModel>::runOnIndices( const unsigned int *indices, un
                                                                                                                         para->getParD(level)->isEvenTimestep,
                                                                                                                         indices,
                                                                                                                         size_indices);
-            break;	default:
+            break;
+        default:
             throw std::runtime_error("Invalid CollisionTemplate in CumulantK17::runOnIndices()");
             break;
     }
@@ -126,7 +129,7 @@ CumulantK17<turbulenceModel>::CumulantK17(std::shared_ptr<Parameter> para, int l
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
     this->kernelUsesFluidNodeIndices = true;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
index 1ffec96c255b7923f3ee39c01f756abd8cad8862..2044c6ad8d7f242c96479cd060c70b91c1dfb216 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17_Device.cu
@@ -607,7 +607,7 @@ __global__ void LB_Kernel_CumulantK17(
     m_001 = -m_001;
 
     //Write to array here to distribute read/write
-    if(writeMacroscopicVariables)
+    if(writeMacroscopicVariables || turbulenceModel==TurbulenceModel::AMD)
     {
         rho[k_000] = drho;
         vx[k_000] = vvx;
@@ -664,33 +664,33 @@ __global__ void LB_Kernel_CumulantK17(
     //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
     //! DOI:10.3390/computation5020019 ]</b></a>
     //!
-    (dist.f[DIR_P00])[k_000]    = f_M00;
-    (dist.f[DIR_M00])[k_M00]    = f_P00;
-    (dist.f[DIR_0P0])[k_000]    = f_0M0;
-    (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
-    (dist.f[DIR_00P])[k_000]    = f_00M;
-    (dist.f[DIR_00M])[k_00M]    = f_00P;
-    (dist.f[DIR_PP0])[k_000]   = f_MM0;
-    (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
-    (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
-    (dist.f[DIR_MP0])[k_M00]   = f_PM0;
-    (dist.f[DIR_P0P])[k_000]   = f_M0M;
-    (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
-    (dist.f[DIR_P0M])[k_00M]   = f_M0P;
-    (dist.f[DIR_M0P])[k_M00]   = f_P0M;
-    (dist.f[DIR_0PP])[k_000]   = f_0MM;
-    (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
-    (dist.f[DIR_0PM])[k_00M]   = f_0MP;
-    (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
+    (dist.f[DIR_P00])[k_000] = f_M00;
+    (dist.f[DIR_M00])[k_M00] = f_P00;
+    (dist.f[DIR_0P0])[k_000] = f_0M0;
+    (dist.f[DIR_0M0])[k_0M0] = f_0P0;
+    (dist.f[DIR_00P])[k_000] = f_00M;
+    (dist.f[DIR_00M])[k_00M] = f_00P;
+    (dist.f[DIR_PP0])[k_000] = f_MM0;
+    (dist.f[DIR_MM0])[k_MM0] = f_PP0;
+    (dist.f[DIR_PM0])[k_0M0] = f_MP0;
+    (dist.f[DIR_MP0])[k_M00] = f_PM0;
+    (dist.f[DIR_P0P])[k_000] = f_M0M;
+    (dist.f[DIR_M0M])[k_M0M] = f_P0P;
+    (dist.f[DIR_P0M])[k_00M] = f_M0P;
+    (dist.f[DIR_M0P])[k_M00] = f_P0M;
+    (dist.f[DIR_0PP])[k_000] = f_0MM;
+    (dist.f[DIR_0MM])[k_0MM] = f_0PP;
+    (dist.f[DIR_0PM])[k_00M] = f_0MP;
+    (dist.f[DIR_0MP])[k_0M0] = f_0PM;
     (dist.f[DIR_000])[k_000] = f_000;
-    (dist.f[DIR_PPP])[k_000]  = f_MMM;
-    (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
-    (dist.f[DIR_PPM])[k_00M]  = f_MMP;
-    (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
-    (dist.f[DIR_MPP])[k_M00]  = f_PMM;
-    (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
-    (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
-    (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
+    (dist.f[DIR_PPP])[k_000] = f_MMM;
+    (dist.f[DIR_PMP])[k_0M0] = f_MPM;
+    (dist.f[DIR_PPM])[k_00M] = f_MMP;
+    (dist.f[DIR_PMM])[k_0MM] = f_MPP;
+    (dist.f[DIR_MPP])[k_M00] = f_PMM;
+    (dist.f[DIR_MMP])[k_MM0] = f_PPM;
+    (dist.f[DIR_MPM])[k_M0M] = f_PMP;
+    (dist.f[DIR_MMM])[k_MMM] = f_PPP;
 }
 
 template __global__ void LB_Kernel_CumulantK17 < TurbulenceModel::AMD, true, true > ( real omega_in, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long long numberOfLBnodes, int level, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep, const uint *fluidNodeIndices, uint numberOfFluidNodes);
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
index b9e25494490507bde5a6aa7d6dd588ac1a1f6c87..13b54723780fa16374b332c731fc35c5664d75b6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.cu
@@ -39,7 +39,7 @@ CumulantK17BulkComp::CumulantK17BulkComp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK17BulkComp::CumulantK17BulkComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index 295804887f9c451120d463c7fcdd968bd2f24d12..59c405ae6e3bb46f608454ddb3a11bb0baac134f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -22,7 +22,7 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
 
     myPreProcessorTypes.push_back(InitCompSP27);
 
-    myKernelGroup = BasicKernel;
+    
 
     this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
index bc058881e2a013effa417a149cf7a17bce646c6f..466b9f85999257196e860e84919ca6ccce6946b7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.cu
@@ -33,6 +33,6 @@ void CumulantK17CompChim::run()
 CumulantK17CompChim::CumulantK17CompChim(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
 	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
+	
 	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
index 2e0af0bdb85d3f008768f9f430e8b4e5d9719b0f..15d3509e735faa08b97d0876600c30876829c35f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.cu
@@ -37,5 +37,5 @@ CumulantK18Comp::CumulantK18Comp(std::shared_ptr<Parameter> para, int level)
 	myPreProcessorTypes.push_back(InitCompSP27);
 	myPreProcessorTypes.push_back(InitF3);
 
-	myKernelGroup = F3Kernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
index d0d81eaac711d4d80284b66a1040e0e8404f5d4d..8181cdb690b5813c368eddadb9cda58a7d749302 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.cu
@@ -37,5 +37,5 @@ CumulantK20Comp::CumulantK20Comp(std::shared_ptr<Parameter> para, int level)
 	myPreProcessorTypes.push_back(InitCompSP27);
 	myPreProcessorTypes.push_back(InitF3);
 
-	myKernelGroup = F3Kernel;
+	
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
index b576333f50304f5628e073d2eee16cf5b82c9d34..6e11bd97a2e76cca3983a83f785a2435d40f594b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.cu
@@ -32,7 +32,7 @@ MRTCompSP27::MRTCompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 MRTCompSP27::MRTCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
index 3fb9be28654f83a7a98bb7d6b3a8a46e9170e7a8..39bd1f3491d0d70e4734d04ef8a2d6e38cdc6448 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27.cu
@@ -32,7 +32,7 @@ BGKIncompSP27::BGKIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKIncompSP27::BGKIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
index f274f576a14fc193bcabd44d2c9078a2c98055bc..84a55b89d68f9a1e18c5114f8088a7dee24a4cd1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27.cu
@@ -32,7 +32,7 @@ BGKPlusIncompSP27::BGKPlusIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 BGKPlusIncompSP27::BGKPlusIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
index 3a6760b619d2ca1a7eb19771478eb9e5989ead0c..b060137f2d505886ee02a4b72e372ce8b4d48a78 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27.cu
@@ -32,7 +32,7 @@ CascadeIncompSP27::CascadeIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CascadeIncompSP27::CascadeIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
index 44beb8507d5664f01283130dd3087a788e4491ed..2cade430786b17567c47264f0638dba259b3192d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27.cu
@@ -37,7 +37,7 @@ Cumulant1hIncompSP27::Cumulant1hIncompSP27(std::shared_ptr<Parameter> para, int
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 Cumulant1hIncompSP27::Cumulant1hIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
index 3a740bef6d7fbaa2883b3d36930d49bf9bf0bb3e..840067da7f34a4415b1b14458ae0fc8d316e366d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27.cu
@@ -35,7 +35,7 @@ CumulantIsoIncompSP27::CumulantIsoIncompSP27(std::shared_ptr<Parameter> para, in
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantIsoIncompSP27::CumulantIsoIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
index 7ae17b97170b4d8474acd6777f7c27411a962681..c597924193d859a35dddaa7b37a56e21d265ceba 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp.cu
@@ -32,7 +32,7 @@ CumulantK15Incomp::CumulantK15Incomp(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 CumulantK15Incomp::CumulantK15Incomp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
index 7645703e0d40176b136762d6b48633f4a9c0d950..daa90091fe092a98741d0764e2327f3ce4c9d2bc 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27.cu
@@ -32,7 +32,7 @@ MRTIncompSP27::MRTIncompSP27(std::shared_ptr<Parameter> para, int level)
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 MRTIncompSP27::MRTIncompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
index 43724f9165e2bb8dca1705ae0053612df92413ec..a8c1af64ebd4641a755bf9fed7e9fafa18e9cad7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27.cu
@@ -57,7 +57,7 @@ PMCumulantOneCompSP27::PMCumulantOneCompSP27(std::shared_ptr<Parameter> para, st
 
 	myPreProcessorTypes.push_back(InitSP27);
 
-	myKernelGroup = BasicKernel;
+	
 }
 
 PMCumulantOneCompSP27::PMCumulantOneCompSP27()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
index 2b8a7d61e8966e2ed00022986311ae68ac0ca6d6..cfcc544aac2172cef2f4d58600931db8ccfa0189 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp.cu
@@ -55,7 +55,7 @@ WaleCumulantK15Comp::WaleCumulantK15Comp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK15Comp::WaleCumulantK15Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
index 49ee20b44f37b01cd9bc837024a47c1428c00a18..05e257a52b38e2c31badcb1fb739de3ab0239f6e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp.cu
@@ -58,7 +58,7 @@ WaleBySoniMalavCumulantK15Comp::WaleBySoniMalavCumulantK15Comp(std::shared_ptr<P
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleBySoniMalavCumulantK15Comp::WaleBySoniMalavCumulantK15Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
index c9c16e2d2d2259656248948f3f10977c8f18fd24..b7f4038c6b67cc4d1cf521bc7a904801650d1e8d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp.cu
@@ -60,7 +60,7 @@ WaleCumulantK17Comp::WaleCumulantK17Comp(std::shared_ptr<Parameter> para, int le
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK17Comp::WaleCumulantK17Comp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
index b3cdd494c02c6649d60818b6b264b8db8b79d426..5fe0284e675785691e51a58e7e0869ba4164ad5f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp.cu
@@ -70,7 +70,7 @@ WaleCumulantK17DebugComp::WaleCumulantK17DebugComp(std::shared_ptr<Parameter> pa
 
 	myPreProcessorTypes.push_back(InitCompSP27);
 
-	myKernelGroup = BasicWaleKernel;
+	
 }
 
 WaleCumulantK17DebugComp::WaleCumulantK17DebugComp()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index 5a2d8c9a426e5cb23ca75f91aaf6fbff75cba72b..27c061ce99f71c349ac1c479efb5f9b780cff3b2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -2,6 +2,8 @@
 
 #include "Parameter/Parameter.h"
 
+#include "Kernel/Utilities/KernelTypes.h"
+
 //LBM kernel (compressible)
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h"
@@ -57,151 +59,153 @@
 #include "Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/PMFluidFlowCompStrategy.h"
 #include "Kernel/Kernels/WaleKernels/FluidFlow/Compressible/WaleFluidFlowCompStrategy.h"
 
+using namespace vf;
+
 std::vector<std::shared_ptr<Kernel>> KernelFactoryImp::makeKernels(std::shared_ptr<Parameter> para)
 {
-	std::vector< std::shared_ptr< Kernel>> kernels;
-	for (int level = 0; level <= para->getMaxLevel(); level++)
-		kernels.push_back(makeKernel(para, para->getMainKernel(), level));
-
-	if (para->getMaxLevel() > 0)
-		if (para->getMultiKernelOn())
-			for (std::size_t i = 0; i < para->getMultiKernelLevel().size(); i++)
-				setKernelAtLevel(kernels, para, para->getMultiKernel().at(i), para->getMultiKernelLevel().at(i));
-	return kernels;
+    std::vector< std::shared_ptr< Kernel>> kernels;
+    for (int level = 0; level <= para->getMaxLevel(); level++)
+        kernels.push_back(makeKernel(para, para->getMainKernel(), level));
+
+    if (para->getMaxLevel() > 0)
+        if (para->getMultiKernelOn())
+            for (std::size_t i = 0; i < para->getMultiKernelLevel().size(); i++)
+                setKernelAtLevel(kernels, para, para->getMultiKernel().at(i), para->getMultiKernelLevel().at(i));
+    return kernels;
 }
 
 std::vector<std::shared_ptr<ADKernel>> KernelFactoryImp::makeAdvDifKernels(std::shared_ptr<Parameter> para)
 {
-	std::vector< std::shared_ptr< ADKernel>> aDKernels;
-	for (int level = 0; level <= para->getMaxLevel(); level++)
-		aDKernels.push_back(makeAdvDifKernel(para, para->getADKernel(), level));
-	return aDKernels;
+    std::vector< std::shared_ptr< ADKernel>> aDKernels;
+    for (int level = 0; level <= para->getMaxLevel(); level++)
+        aDKernels.push_back(makeAdvDifKernel(para, para->getADKernel(), level));
+    return aDKernels;
 }
 
 void KernelFactoryImp::setPorousMedia(std::vector<std::shared_ptr<PorousMedia>> pm)
 {
-	this->pm = pm;
+    this->pm = pm;
 }
 
 void KernelFactoryImp::setKernelAtLevel(std::vector<std::shared_ptr<Kernel>> kernels, std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
-	kernels.at(level) = makeKernel(para, kernel, level);
+    kernels.at(level) = makeKernel(para, kernel, level);
 }
 
 std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
     printf("Instantiating Kernel: %s\n", kernel.c_str());
-	std::shared_ptr<KernelImp> newKernel;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
+    std::shared_ptr<KernelImp> newKernel;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
 
-    if (kernel == "BGKCompSP27") {
-        newKernel     = BGKCompSP27::getNewInstance(para, level);   // compressible
-        checkStrategy = FluidFlowCompStrategy::getInstance();       //      ||
-    } else if (kernel == "BGKUnified") {                            //      \/
+    if (kernel == CollisionKernel::Compressible::BGK) {
+        newKernel     = BGKCompSP27::getNewInstance(para, level);               // compressible
+        checkStrategy = FluidFlowCompStrategy::getInstance();                   //      ||
+    } else if (kernel == CollisionKernel::Compressible::BGKUnified) {           //      \/
         newKernel     = std::make_shared<vf::gpu::BGKUnified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "BGKPlusCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::BGKPlus) {
         newKernel     = BGKPlusCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "MRTCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::MRT) {
         newKernel     = MRTCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CascadeCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::Cascade) {
         newKernel     = CascadeCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantCompSP27") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantClassic) {
         newKernel     = CumulantCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Unified") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Unified) {
         newKernel     = std::make_shared<vf::gpu::CumulantK15Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17Unified") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Unified) {
         newKernel     = std::make_shared<vf::gpu::CumulantK17Unified>(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17BulkComp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Bulk) {
         newKernel     = CumulantK17BulkComp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17CompChim") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17Chim) {
         newKernel     = CumulantK17CompChim::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK17"){               
-        switch(para->getTurbulenceModel())                                          
-        {   
+    } else if (kernel == CollisionKernel::Compressible::CumulantK17){
+        switch(para->getTurbulenceModel())
+        {
             case TurbulenceModel::AMD:
-                newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level);   
+                newKernel = CumulantK17<TurbulenceModel::AMD>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::Smagorinsky:
-                newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);  
+                newKernel = CumulantK17<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::QR:
-                newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level);  
+                newKernel = CumulantK17<TurbulenceModel::QR>::getNewInstance(para, level);
                 break;
             case TurbulenceModel::None:
-                newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level); 
+                newKernel = CumulantK17<TurbulenceModel::None>::getNewInstance(para, level);
                 break;
             default:
                 throw std::runtime_error("Unknown turbulence model!");
-            break;                                                              
-        }                                                                       
-        checkStrategy = FluidFlowCompStrategy::getInstance();       
-    } else if (kernel == "CumulantAll4CompSP27") {
+            break;
+        }
+        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == CollisionKernel::Compressible::CumulantAll4SP27) {
         newKernel     = CumulantAll4CompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK18Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK18) {
         newKernel     = CumulantK18Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK20Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK20) {
         newKernel     = CumulantK20Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Comp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15) {
         newKernel     = CumulantK15Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15BulkComp") {
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Bulk) {
         newKernel     = CumulantK15BulkComp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
-    } else if (kernel == "CumulantK15SpongeComp") {                             //     /\      //
-        newKernel     = CumulantK15SpongeComp::getNewInstance(para, level);     //	   ||
+    } else if (kernel == CollisionKernel::Compressible::CumulantK15Sponge) {    //     /\      //
+        newKernel     = CumulantK15SpongeComp::getNewInstance(para, level);     //     ||
         checkStrategy = FluidFlowCompStrategy::getInstance();                   // compressible
-    }																			//===============
-	else if (  kernel == "BGKIncompSP27") {										// incompressible
-        newKernel     = BGKIncompSP27::getNewInstance(para, level);				//	   ||
+    }                                                                           //===============
+    else if (  kernel == CollisionKernel::Incompressible::BGK) {                // incompressible
+        newKernel     = BGKIncompSP27::getNewInstance(para, level);             //     ||
         checkStrategy = FluidFlowIncompStrategy::getInstance();                 //     \/
-    } else if (kernel == "BGKPlusIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::BGKPlus) {
         newKernel     = BGKPlusIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "MRTIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::MRT) {
         newKernel     = MRTIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CascadeIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::Cascade) {
         newKernel     = CascadeIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "Cumulant1hIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::Cumulant1h) {
         newKernel     = Cumulant1hIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CumulantIsoIncompSP27") {
+    } else if (kernel == CollisionKernel::Incompressible::CumulantIsometric) {
         newKernel     = CumulantIsoIncompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowIncompStrategy::getInstance();
-    } else if (kernel == "CumulantK15Incomp") {									//     /\      //
-        newKernel     = CumulantK15Incomp::getNewInstance(para, level);			//	   ||
-        checkStrategy = FluidFlowIncompStrategy::getInstance();                 // incompressible
-    }																			//===============
-	else if (kernel == "PMCumulantOneCompSP27") {								// porous media
-        newKernel     = PMCumulantOneCompSP27::getNewInstance(para, pm, level);	//	   ||
-        checkStrategy = PMFluidFlowCompStrategy::getInstance();                 // porous media
-    }                                                                           //===============
-    else if (kernel == "WaleCumulantK17Comp") {                                 // wale model
-        newKernel     = WaleCumulantK17Comp::getNewInstance(para, level);       //	   ||
-        checkStrategy = WaleFluidFlowCompStrategy::getInstance();               //     \/
-    } else if (kernel == "WaleCumulantK17DebugComp") {
+    } else if (kernel == CollisionKernel::Incompressible::CumulantK15) {          //     /\      //
+        newKernel     = CumulantK15Incomp::getNewInstance(para, level);           //     ||
+        checkStrategy = FluidFlowIncompStrategy::getInstance();                   // incompressible
+    }                                                                             //===============
+    else if (kernel == CollisionKernel::PorousMedia::CumulantOne) {               // porous media
+        newKernel     = PMCumulantOneCompSP27::getNewInstance(para, pm, level);   //     ||
+        checkStrategy = PMFluidFlowCompStrategy::getInstance();                   // porous media
+    }                                                                             //===============
+    else if (kernel == CollisionKernel::Wale::CumulantK17) {                      // wale model
+        newKernel     = WaleCumulantK17Comp::getNewInstance(para, level);         //     ||
+        checkStrategy = WaleFluidFlowCompStrategy::getInstance();                 //     \/
+    } else if (kernel == CollisionKernel::Wale::CumulantK17Debug) {
         newKernel     = WaleCumulantK17DebugComp::getNewInstance(para, level);
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();
-    } else if (kernel == "WaleCumulantK15Comp") {
+    } else if (kernel == CollisionKernel::Wale::CumulantK15) {
         newKernel     = WaleCumulantK15Comp::getNewInstance(para, level);
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();
-    } else if (kernel == "WaleBySoniMalavCumulantK15Comp") {                    //     /\      //
-        newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level);// ||
-        checkStrategy = WaleFluidFlowCompStrategy::getInstance();               // wale model
-    }                                                                          //===============
+    } else if (kernel == CollisionKernel::Wale::CumulantK15SoniMalav) {              //     /\      //
+        newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level); //     ||
+        checkStrategy = WaleFluidFlowCompStrategy::getInstance();                    // wale model
+    }                                                                                //===============
     else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
@@ -212,8 +216,8 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
 
 std::shared_ptr<ADKernel> KernelFactoryImp::makeAdvDifKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
-	std::shared_ptr<ADKernel> newKernel;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
+    std::shared_ptr<ADKernel> newKernel;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
 
     if (kernel == "ADComp27") {
         newKernel     = ADComp27::getNewInstance(para, level);
@@ -223,18 +227,18 @@ std::shared_ptr<ADKernel> KernelFactoryImp::makeAdvDifKernel(std::shared_ptr<Par
         checkStrategy = ADMod7CompStrategy::getInstance();
     } else if (kernel == "ADIncomp27") {
         newKernel     = ADIncomp27::getNewInstance(para, level);
-        checkStrategy = ADMod7CompStrategy::getInstance();
+        checkStrategy = ADMod7IncompStrategy::getInstance();
     } else if (kernel == "ADIncomp7") {
         newKernel     = ADIncomp7::getNewInstance(para, level);
-        checkStrategy = ADMod7CompStrategy::getInstance();
+        checkStrategy = ADMod7IncompStrategy::getInstance();
     } else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
 
-	if (newKernel) {
-		newKernel->setCheckParameterStrategy(checkStrategy);
-		return newKernel;
-	}
-	else
-		throw  std::runtime_error("KernelFactory does not know the KernelType.");
+    if (newKernel) {
+        newKernel->setCheckParameterStrategy(checkStrategy);
+        return newKernel;
+    }
+    else
+        throw  std::runtime_error("KernelFactory does not know the KernelType.");
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h
deleted file mode 100644
index 0a6543ca0ac1d47bb6f8838d029769846c361868..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelGroup.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef KERNEL_GROUP_H
-#define KERNEL_GROUP_H
-
-enum KernelGroup
-{
-	BasicKernel,
-	BasicWaleKernel,
-	F3Kernel,
-	F3WaleKernel,
-	ADKernel7,
-	ADKernel27
-};
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h
new file mode 100644
index 0000000000000000000000000000000000000000..f249c0bd595d21455b4338334763be4e08abeda9
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h
@@ -0,0 +1,51 @@
+#ifndef KERNEL_TYPES_H
+#define KERNEL_TYPES_H
+
+namespace vf::CollisionKernel::Compressible {
+    static const std::string BGK = "BGKCompSP27";
+    static const std::string BGKUnified = "BGKUnified";
+    static const std::string BGKPlus = "BGKPlusCompSP27";
+    static const std::string MRT = "MRTCompSP27";
+    static const std::string Cascade = "CascadeCompSP27";
+
+    static const std::string CumulantClassic = "CumulantCompSP27";
+
+    static const std::string CumulantK15Unified = "CumulantK15Unified";
+    static const std::string CumulantK17Unified = "CumulantK17Unified";
+
+    static const std::string CumulantK17Bulk = "CumulantK17BulkComp";
+    static const std::string CumulantK17Chim = "CumulantK17CompChim";
+    static const std::string CumulantK17 = "CumulantK17";
+
+    static const std::string CumulantAll4SP27 = "CumulantAll4CompSP27";
+    static const std::string CumulantK18 = "CumulantK18Comp";
+    static const std::string CumulantK20 = "CumulantK20Comp";
+
+    static const std::string CumulantK15 = "CumulantK15Comp";
+    static const std::string CumulantK15Bulk = "CumulantK15BulkComp";
+    static const std::string CumulantK15Sponge = "CumulantK15SpongeComp";
+}
+
+namespace vf::CollisionKernel::Incompressible {
+    static const std::string BGK = "BGKIncompSP27";
+    static const std::string BGKPlus = "BGKPlusIncompSP27";
+    static const std::string MRT = "MRTIncompSP27";
+    static const std::string Cascade = "CascadeIncompSP27";
+
+    static const std::string Cumulant1h = "Cumulant1hIncompSP27";
+    static const std::string CumulantIsometric = "CumulantIsoIncompSP27";
+    static const std::string CumulantK15 = "CumulantK15Incomp";
+}
+
+namespace vf::CollisionKernel::PorousMedia {
+    static const std::string CumulantOne = "CumulantOneCompSP27";
+}
+
+namespace vf::CollisionKernel::Wale {
+    static const std::string CumulantK17 = "WaleCumulantK17Comp";
+    static const std::string CumulantK17Debug = "WaleCumulantK17DebugComp";
+    static const std::string CumulantK15 = "WaleCumulantK15Comp";
+    static const std::string CumulantK15SoniMalav = "WaleBySoniMalavCumulantK15Comp";
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
index 2b6a266c0d4e5f523091fa4982eee5d83b2ec675..0841d6931bba32440b47d02c9f83864a80f724be 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/GridScalingKernelManager.cpp
@@ -47,10 +47,10 @@ GridScalingKernelManager::GridScalingKernelManager(SPtr<Parameter> parameter, Gr
         if(!gridScalingFactory){
             throw std::runtime_error("There is more than one level, but no scalingFactory was provided.");
         }
-        checkScalingFunction(gridScalingFactory->getGridScalingFC(), this->para->getParD(0)->intFC, "scalingFineToCoarse");
-        checkScalingFunction(gridScalingFactory->getGridScalingCF(), this->para->getParD(0)->intCF, "scalingCoarseToFine");
-        this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC();
-        this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF();
+        checkScalingFunction(gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->intFC, "scalingFineToCoarse");
+        checkScalingFunction(gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity()), this->para->getParD(0)->intCF, "scalingCoarseToFine");
+        this->scalingFineToCoarse = gridScalingFactory->getGridScalingFC(parameter->getUseTurbulentViscosity());
+        this->scalingCoarseToFine = gridScalingFactory->getGridScalingCF(parameter->getUseTurbulentViscosity());
     }
     
     if(this->scalingFineToCoarse == nullptr)
diff --git a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
index 37208ee59586533fa7f8ffbc269246826ed27fb8..e910f8ac5a71053d927e2531dcb225199d708749 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/GPUHelperFunctions/KernelUtilities.h
@@ -166,6 +166,14 @@ __inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q,
            + (q * (f + fInverse)) / (c1o1 + q);
 }
 
+__inline__ __device__ real getInterpolatedDistributionForNoSlipWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq, 
+                                                                  const real& omega, const real& drho, const real weight)
+{
+
+    return (c1o1-q) / (c1o1+q) * (f - fInverse + (f + fInverse - c2o1 * feq * omega) / (c1o1 - omega)) * c1o2 
+           + (q * (f + fInverse)) / (c1o1 + q) - weight * drho;
+}
+
 
 __inline__ __device__ real getInterpolatedDistributionForVeloWithPressureBC(const real& q, const real& f, const real& fInverse, const real& feq,
                                                                             const real& omega, const real& drho, const real& velocity, const real weight)
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
index 74a706165489a86cace40047beb09996aa0aa8db..5a5e010944a776038416386267c3bf6477d47e9f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
@@ -51,9 +51,9 @@ void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& co
     VF_LOG_INFO(" {} \t --- {} --- {:>8.1f}/ {:<8.1f} \t   {:5.1f} \t       {:4.1f}",  communicator.getPID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth);
 
     // When using multiple GPUs, sum the nups of all processes
-    if (communicator.getNummberOfProcess() > 1) {
+    if (communicator.getNumberOfProcess() > 1) {
         double nupsSum =  communicator.sumNups(fnups);
         if (communicator.getPID() == 0)
-            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNummberOfProcess(), nupsSum);
+            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNumberOfProcess(), nupsSum);
     }
 }
\ No newline at end of file
diff --git a/src/logger/Logger.cpp b/src/logger/Logger.cpp
index 708e359c8430380dd57c404ed9b3c41f53dcb714..d6b79e110c0827a8e91640ad77d4cb6675f23ce4 100644
--- a/src/logger/Logger.cpp
+++ b/src/logger/Logger.cpp
@@ -10,7 +10,7 @@ namespace vf::logging
 
     std::string Logger::logPath = {"logs/"};
 
-    void Logger::initalizeLogger() 
+    void Logger::initializeLogger() 
     {
         updateDefaultLogger();
 
diff --git a/src/logger/Logger.h b/src/logger/Logger.h
index adb7796135a989843ef8de1f778c9901f3ad17c8..3a25fea02eb7d5ea1ab9bffebea08bfc9f512b04 100644
--- a/src/logger/Logger.h
+++ b/src/logger/Logger.h
@@ -33,7 +33,7 @@
 
 // VirtualFluids is using the spdlog logger https://github.com/gabime/spdlog
 #include <spdlog/spdlog.h>
-// To initialize spdlog initalizeLogger() must be called.
+// To initialize spdlog initializeLogger() must be called.
 // spdlog supports 5 log level, which can be changed at runtime e.g.:
 // spdlog::set_level(spdlog::level::debug)
 // The default log level is set to trace. Supported levels: trace < debug < info < warning < critical
@@ -58,7 +58,7 @@ namespace vf::logging
     {
     public:
         // initalizing the above named logger
-        static void initalizeLogger();
+        static void initializeLogger();
 
         // changing the path of the log files
         static void changeLogPath(const std::string& path);