diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index d35ea76dc8bc6bb625e051775e1b6c4a9392cff7..48f75cfa9f14a77ac105962df0c0a7cd5a7d982c 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -120,7 +120,7 @@ void multipleLevel(const std::string& configPath)
 
     const real H = config.getValue("boundaryLayerHeight", 1000.0); // boundary layer height in m
 
-    const real L_x = 6*H;
+    const real L_x = 10*H;
     const real L_y = 4*H;
     const real L_z = H;
 
@@ -142,13 +142,11 @@ void multipleLevel(const std::string& configPath)
     int nTWritePrecursor; real tStartPrecursor, posXPrecursor;
     if(writePrecursor)
     {
-        nTWritePrecursor      = config.getValue<int>("nTimestepsWritePrecursor");
+        nTWritePrecursor     = config.getValue<int>("nTimestepsWritePrecursor");
         tStartPrecursor      = config.getValue<real>("tStartPrecursor");
         posXPrecursor        = config.getValue<real>("posXPrecursor");
         useDistributions     = config.getValue<bool>("useDistributions", false);
-        precursorDirectory = config.getValue<std::string>("precursorDirectory");
-
-
+        precursorDirectory   = config.getValue<std::string>("precursorDirectory");
     }
 
     const bool readPrecursor = config.getValue("readPrecursor", false);
@@ -158,8 +156,8 @@ void multipleLevel(const std::string& configPath)
         nTReadPrecursor = config.getValue<int>("nTimestepsReadPrecursor");
         precursorDirectory = config.getValue<std::string>("precursorDirectory");
         useDistributions     = config.getValue<bool>("useDistributions", false);
-
     }
+
     // all in s
     const float tStartOut   = config.getValue<real>("tStartOut");
     const float tOut        = config.getValue<real>("tOut");
@@ -197,7 +195,7 @@ void multipleLevel(const std::string& configPath)
 
     para->setPrintFiles(true);
 
-    para->setForcing(pressureGradientLB, 0, 0);
+    if(!readPrecursor) para->setForcing(pressureGradientLB, 0, 0);
     para->setVelocityLB(velocityLB);
     para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
@@ -247,14 +245,14 @@ void multipleLevel(const std::string& configPath)
         xGridMax += overlap;
         xGridMin -= overlap;
     }
-    gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false);
 
     gridBuilder->addCoarseGrid( xGridMin,  0.0,  0.0,
                                 xGridMax,  L_y,  L_z, dx);
-    if(false)// Add refinement
+    if(true)// Add refinement
     {
-        gridBuilder->setNumberOfLayers(12, 8);
-        gridBuilder->addGrid( new Cuboid( xGridMin, 0.f, 0.f, xGridMax, L_y,  0.3*L_z) , 1 );
+        gridBuilder->setNumberOfLayers(4,0);
+        real xMaxRefinement = readPrecursor? xGridMax-H: xGridMax;   //Stop refinement some distance before outlet if domain ist not periodic
+        gridBuilder->addGrid( new Cuboid( xGridMin+dx, 0.f, 0.f, xMaxRefinement, L_y,  0.5*L_z) , 1 );
         para->setMaxLevel(2);
         scalingFactory.setScalingFactory(GridScalingFactory::GridScaling::ScaleCompressible);
     }
@@ -267,7 +265,7 @@ void multipleLevel(const std::string& configPath)
     }
     else         
     { 
-        gridBuilder->setPeriodicBoundaryCondition(true, true, false);
+        gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false);
     }
 
 	gridBuilder->buildGrids(lbmOrGks, true); // buildGrids() has to be called before setting the BCs!!!!
@@ -285,60 +283,68 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->setCommunicationProcess(CommunicationDirections::MX, procID-1);
         }
 
-        if (isFirstSubDomain) {
+        if (isFirstSubDomain && !readPrecursor) {
             gridBuilder->findCommunicationIndices(CommunicationDirections::MX, lbmOrGks);
             gridBuilder->setCommunicationProcess(CommunicationDirections::MX, nProcs-1);
         }
 
-        if (isLastSubDomain) {
+        if (isLastSubDomain && !readPrecursor) {
             gridBuilder->findCommunicationIndices(CommunicationDirections::PX, lbmOrGks);
             gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 0);
         }
     }
     uint samplingOffset = 2;
     
+    std::cout << " precursorDirectory " << precursorDirectory << std::endl;
+    
     if(readPrecursor)
     {
-        auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK);
-        gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, nTReadPrecursor);
-
-        gridBuilder->setStressBoundaryCondition(SideType::MZ,
-                                            0.0, 0.0, 1.0,              // wall normals
-                                            samplingOffset, z0/dx);     // wall model settinng
-        para->setHasWallModelMonitor(true);
-        
-        gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0f,  0.0f, -1.0f);
+        if(isFirstSubDomain || nProcs == 1)
+        {   
+            auto precursor = createFileCollection(precursorDirectory + "/precursor", FileType::VTK);
+            gridBuilder->setPrecursorBoundaryCondition(SideType::MX, precursor, nTReadPrecursor);
+        }
 
-        
-        gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.f);
+        if(isLastSubDomain || nProcs == 1)
+        {
+            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.f);
+        }     
     } 
-    else
-    {
-        gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0,  0.0, -1.0);
 
-        gridBuilder->setStressBoundaryCondition(SideType::MZ,
+    gridBuilder->setStressBoundaryCondition(SideType::MZ,
                                             0.0, 0.0, 1.0,              // wall normals
-                                            samplingOffset, z0/dx);     // wall model settinng
-        para->setHasWallModelMonitor(true);
-    }
-
-
+                                            samplingOffset, z0, dx);     // wall model settinng
+    para->setHasWallModelMonitor(true);   
+    gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0f,  0.0f, -1.0f); 
 
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
     bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
     bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); 
     bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
     bcFactory.setPrecursorBoundaryCondition(useDistributions ? BoundaryConditionFactory::PrecursorBC::DistributionsPrecursor : BoundaryConditionFactory::PrecursorBC::VelocityPrecursor);
     para->setOutflowPressureCorrectionFactor(0.0); 
 
-
-
-    para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+    if(readPrecursor)
+    {
+        para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+        rho = (real)0.0;
+        vx  = rho = c0o1;
+        vx  = u_star/c4o10*(u_star/c4o10 * log(coordZ/z0+c1o1)) * dt/dx; 
+        vy  = c0o1; 
+        vz  = c0o1;
+        });
+    }
+    else
+    {
+        para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
         vx  = rho = c0o1;
         vx  = (u_star/c4o10 * log(coordZ/z0+c1o1) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)) * dt/dx; 
         vy  = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1) * dt/dx; 
         vz  = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/H)*sin(cPi*c8o1*coordZ/H)+sin(cPi*c8o1*coordX/L_x))/(pow(c1o2*L_z-coordZ, c2o1)+c1o1) * dt/dx;
-    });
+        });
+    }
+
 
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -359,45 +365,44 @@ void multipleLevel(const std::string& configPath)
     //     para->addProbe( wallModelProbe );
     // }
 
-    // SPtr<PlaneProbe> planeProbe1 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_1", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    // planeProbe1->setProbePlane(100.0, 0.0, 0, dx, L_y, L_z);
-    // planeProbe1->addAllAvailableStatistics();
-    // para->addProbe( planeProbe1 );
-
-    // if(readPrecursor)
-    // {
-    //     SPtr<PlaneProbe> planeProbe2 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_2", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    //     planeProbe2->setProbePlane(1000.0, 0.0, 0, dx, L_y, L_z);
-    //     planeProbe2->addAllAvailableStatistics();
-    //     para->addProbe( planeProbe2 );
-
-    //     SPtr<PlaneProbe> planeProbe3 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_3", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    //     planeProbe3->setProbePlane(1500.0, 0.0, 0, dx, L_y, L_z);
-    //     planeProbe3->addAllAvailableStatistics();
-    //     para->addProbe( planeProbe3 );
-
-    //     SPtr<PlaneProbe> planeProbe4 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_4", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    //     planeProbe4->setProbePlane(2000.0, 0.0, 0, dx, L_y, L_z);
-    //     planeProbe4->addAllAvailableStatistics();
-    //     para->addProbe( planeProbe4 );
-
-    //     SPtr<PlaneProbe> planeProbe5 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_5", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    //     planeProbe5->setProbePlane(2500.0, 0.0, 0, dx, L_y, L_z);
-    //     planeProbe5->addAllAvailableStatistics();
-    //     para->addProbe( planeProbe5 );
-
-    //     SPtr<PlaneProbe> planeProbe6 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_6", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
-    //     planeProbe6->setProbePlane(0.0, L_y/2.0, 0, L_x, dx, L_z);
-    //     planeProbe6->addAllAvailableStatistics();
-    //     para->addProbe( planeProbe6 );
-    // }
+    SPtr<PlaneProbe> planeProbe1 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_1", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+    planeProbe1->setProbePlane(100.0, 0.0, 0, dx, L_y, L_z);
+    planeProbe1->addAllAvailableStatistics();
+    para->addProbe( planeProbe1 );
 
+    if(readPrecursor)
+    {
+        SPtr<PlaneProbe> planeProbe2 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_2", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe2->setProbePlane(1000.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe2->addAllAvailableStatistics();
+        para->addProbe( planeProbe2 );
+
+        SPtr<PlaneProbe> planeProbe3 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_3", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe3->setProbePlane(1500.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe3->addAllAvailableStatistics();
+        para->addProbe( planeProbe3 );
+
+        SPtr<PlaneProbe> planeProbe4 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_4", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe4->setProbePlane(2000.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe4->addAllAvailableStatistics();
+        para->addProbe( planeProbe4 );
+
+        SPtr<PlaneProbe> planeProbe5 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_5", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe5->setProbePlane(2500.0, 0.0, 0, dx, L_y, L_z);
+        planeProbe5->addAllAvailableStatistics();
+        para->addProbe( planeProbe5 );
+
+        SPtr<PlaneProbe> planeProbe6 = SPtr<PlaneProbe>( new PlaneProbe("planeProbe_6", para->getOutputPath(), tStartAveraging/dt, 10, tStartOutProbe/dt, tOutProbe/dt) );
+        planeProbe6->setProbePlane(0.0, L_y/2.0, 0, L_x, dx, L_z);
+        planeProbe6->addAllAvailableStatistics();
+        para->addProbe( planeProbe6 );
+    }
 
-    // if(writePrecursor)
-    // {
-    //     SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities);
-    //     para->addProbe(precursorWriter);
-    // }
+    if(writePrecursor && (posXPrecursor > xMin && posXPrecursor < xMax))
+    {
+        SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities, 1000);
+        para->addProbe(precursorWriter);
+    }
 
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
diff --git a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp
index ed1335d2314f6fe4459f711872c1af968b4a600d..95d3e4515582f791b84b5332de181589169d5f97 100644
--- a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp
+++ b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.cpp
@@ -22,12 +22,12 @@ SPtr<VelocityFileCollection> createFileCollection(std::string prefix, FileType t
     }
 }
 
-SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection)
+SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection, uint readLevel)
 {
     switch(fileCollection->getFileType())
     {
         case FileType::VTK:
-            return std::make_shared<VTKReader>(std::static_pointer_cast<VTKFileCollection>(fileCollection));
+            return std::make_shared<VTKReader>(std::static_pointer_cast<VTKFileCollection>(fileCollection), readLevel);
             break;
         default:
             return nullptr;
@@ -201,17 +201,26 @@ void VTKFileCollection::findFiles()
                 if(f.good())
                     filesWithThisId.emplace_back(fname);
                 else
-                    foundLastPart = true;
-                
+                    foundLastPart = true;    
             }
             if(!filesWithThisId.empty())
+            {
+                VF_LOG_INFO("VTKFileCollection found {} files with ID {} level {}", filesWithThisId.size(), filesOnThisLevel.size(), files.size() );
                 filesOnThisLevel.push_back(filesWithThisId);
+            }
             else foundLastID = true;
         }
+
+
         if(!filesOnThisLevel.empty())
             files.push_back(filesOnThisLevel);
-        else foundLastLevel = true;
+        else 
+            foundLastLevel = true;
+
     }
+
+    if(files.empty())
+        VF_LOG_CRITICAL("VTKFileCollection found no files!"); 
 }
     
 void VelocityReader::getNeighbors(uint* neighborNT, uint* neighborNB, uint* neighborST, uint* neighborSB)
@@ -261,7 +270,7 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
     this->planeNeighborNB.reserve(this->nPoints);
     this->planeNeighborST.reserve(this->nPoints);
     this->planeNeighborSB.reserve(this->nPoints);
-
+    std::cout << "nPoints " << nPoints << std::endl;
     for(uint i=0; i<nPoints; i++)
     {
 
@@ -269,90 +278,87 @@ void VTKReader::fillArrays(std::vector<real>& coordsY, std::vector<real>& coords
         real posZ = coordsZ[i];
         bool foundNT = false, foundNB = false, foundST = false, foundSB = false, foundAll = false;
 
-        for(int level = (int)this->fileCollection->files.size()-1; level>=0; level--) // go backwards to find finest nodes first
+        uint level = this->readLevel;
+        for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++)
         {
-            for(int fileId=0; fileId<(int)this->fileCollection->files[level].size(); fileId++)
+            VTKFile &file = this->fileCollection->files[level][fileId][0];
+            if(!file.inBoundingBox(posY, posZ, 0.0f)) continue;
+            // y in simulation is x in precursor/file, z in simulation is y in precursor/file 
+            // simulation -> file: N -> E, S -> W, T -> N, B -> S
+            int idx = file.findNeighborWSB(posY, posZ, 0.f);                            //!> index of nearest WSB neighbor on precursor file
+            if(idx!=-1)
             {
-                VTKFile file = this->fileCollection->files[level][fileId][0];
-                if(!file.inBoundingBox(posY, posZ, 0.0f)) continue;
-                // y in simulation is x in precursor/file, z in simulation is y in precursor/file 
-                // simulation -> file: N -> E, S -> W, T -> N, B -> S
-                int idx = file.findNeighborWSB(posY, posZ, 0.f);
-                if(idx!=-1)
+                // Filter for exact matches
+                if(abs(posY-file.getX(idx)) < max_diff && abs(posZ-file.getY(idx)) < max_diff) 
                 {
-                    // Filter for exact matches
-                    if(abs(posY-file.getX(idx)) < max_diff && abs(posZ-file.getY(idx)) < max_diff) 
-                    {
-                        this->weightsNT.emplace_back(1e6f);
-                        this->weightsNB.emplace_back(0.f);
-                        this->weightsST.emplace_back(0.f);
-                        this->weightsSB.emplace_back(0.f);
-                        uint writeIdx = this->getWriteIndex(level, fileId, idx);
-                        this->planeNeighborNT.push_back(writeIdx);
-                        this->planeNeighborNB.push_back(writeIdx);
-                        this->planeNeighborST.push_back(writeIdx);
-                        this->planeNeighborSB.push_back(writeIdx);
-                        foundNT = true; foundNB = true; foundSB = true; foundST = true;
-                    } 
-                    else
-                    {
-                        perfect_match = false;
-                    }
-
-                    if(!foundSB)
-                    {
-                        foundSB = true;
-                        real dy = file.getX(idx)-posY;
-                        real dz = file.getY(idx)-posZ;
-                        this->weightsSB.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                        this->planeNeighborSB.emplace_back(getWriteIndex(level, fileId, idx));
-                    }
-                    
+                    this->weightsNT.emplace_back(1e6f);
+                    this->weightsNB.emplace_back(0.f);
+                    this->weightsST.emplace_back(0.f);
+                    this->weightsSB.emplace_back(0.f);
+                    uint writeIdx = this->getWriteIndex(level, fileId, idx);            //!> writeIdx: index on host/device array where precursor value will be written to after loading from file
+                    this->planeNeighborNT.push_back(writeIdx);                          //!> neighbor lists mapping where BC kernel should read from on host/device array
+                    this->planeNeighborNB.push_back(writeIdx);
+                    this->planeNeighborST.push_back(writeIdx);
+                    this->planeNeighborSB.push_back(writeIdx);
+                    foundNT = true; foundNB = true; foundSB = true; foundST = true;
                 } 
+                else
+                {
+                    perfect_match = false;
+                }
 
-                if(!foundNT) //NT in simulation is EN in precursor
+                if(!foundSB)
                 {
-                    int idx = file.findNeighborENB(posY, posZ, 0.f);
-                    if(idx!=-1)
-                    {
-                        foundNT = true;
-                        real dy = file.getX(idx)-posY;
-                        real dz = file.getY(idx)-posZ;
-                        this->weightsNT.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                        this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx));
-                    }
+                    foundSB = true;
+                    real dy = file.getX(idx)-posY;
+                    real dz = file.getY(idx)-posZ;
+                    this->weightsSB.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighborSB.emplace_back(getWriteIndex(level, fileId, idx));
                 }
+                
+            } 
+            
+            if(!foundNT) //NT in simulation is EN in precursor
+            {
+                int idx = file.findNeighborENB(posY, posZ, 0.f);
+                if(idx!=-1)
+                {
+                    foundNT = true;
+                    real dy = file.getX(idx)-posY;
+                    real dz = file.getY(idx)-posZ;
+                    this->weightsNT.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx));
+                }
+            }
 
-                if(!foundNB) //NB in simulation is ES in precursor
+            if(!foundNB) //NB in simulation is ES in precursor
+            {
+                int idx = file.findNeighborESB(posY, posZ, 0.f);
+                if(idx!=-1)
                 {
-                    int idx = file.findNeighborESB(posY, posZ, 0.f);
-                    if(idx!=-1)
-                    {
-                        foundNB = true;
-                        real dy = file.getX(idx)-posY;
-                        real dz = file.getY(idx)-posZ;
-                        this->weightsNB.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                        this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx));
-                    }
+                    foundNB = true;
+                    real dy = file.getX(idx)-posY;
+                    real dz = file.getY(idx)-posZ;
+                    this->weightsNB.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighborNT.emplace_back(getWriteIndex(level, fileId, idx));
                 }
+            }
 
-                if(!foundST) //ST in simulation is WN in precursor
+            if(!foundST) //ST in simulation is WN in precursor
+            {
+                int idx = file.findNeighborWNB(posY, posZ, 0.f);
+                if(idx!=-1)
                 {
-                    int idx = file.findNeighborWNB(posY, posZ, 0.f);
-                    if(idx!=-1)
-                    {
-                        foundST = true;
-                        real dy = file.getX(idx)-posY;
-                        real dz = file.getY(idx)-posZ;
-                        this->weightsST.emplace_back(1.f/(dy*dy+dz*dz+eps));
-                        this->planeNeighborST.emplace_back(getWriteIndex(level, fileId, idx));
-                    }
+                    foundST = true;
+                    real dy = file.getX(idx)-posY;
+                    real dz = file.getY(idx)-posZ;
+                    this->weightsST.emplace_back(1.f/(dy*dy+dz*dz+eps));
+                    this->planeNeighborST.emplace_back(getWriteIndex(level, fileId, idx));
                 }
+            }
 
-                foundAll = foundNT && foundNB && foundST && foundSB;
+            foundAll = foundNT && foundNB && foundST && foundSB;
 
-                if(foundAll) break;
-            }
             if(foundAll) break;
         }
 
@@ -375,10 +381,10 @@ uint VTKReader::getWriteIndex(int level, int id, int linearIndex)
 {
     auto it = std::find(this->writeIndices[level][id].begin(), this->writeIndices[level][id].end(), linearIndex);
     uint idx = it-this->writeIndices[level][id].begin();
-    if(it==this->writeIndices[level][id].end())
+    if(it==this->writeIndices[level][id].end())                         
     {
-        this->writeIndices[level][id].push_back(this->nPointsRead);
-        this->readIndices[level][id].push_back(linearIndex);
+        this->writeIndices[level][id].push_back(this->nPointsRead);     //!> index on host/device array where value from file will be written to
+        this->readIndices[level][id].push_back(linearIndex);            //!> index in file that will be read from 
         this->nPointsRead++;
     }
     return idx;
@@ -387,8 +393,9 @@ uint VTKReader::getWriteIndex(int level, int id, int linearIndex)
 
 void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
 {
-    for(size_t level=0; level<this->fileCollection->files.size(); level++)
-    {
+    // for(size_t level=0; level<this->fileCollection->files.size(); level++)
+    // {
+        uint level = this->readLevel;
         for(size_t id=0; id<this->fileCollection->files[level].size(); id++)
         {
             size_t nF = this->nFile[level][id];
@@ -421,5 +428,5 @@ void VTKReader::getNextData(real* data, uint numberOfNodes, real time)
             file->getData(data, numberOfNodes, this->readIndices[level][id], this->writeIndices[level][id], off, this->writingOffset);
             this->nFile[level][id] = nF;
         }
-    }
+    // }
 }
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h
index fe8fbbacf51843c599769a58d31c1d8e1fa5b0d6..baff14491658b9091545f1494e58ea2178c2f227 100644
--- a/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h
+++ b/src/gpu/GridGenerator/VelocitySetter/VelocitySetter.h
@@ -173,8 +173,9 @@ protected:
 class VTKReader : public VelocityReader
 {
 public:
-    VTKReader(SPtr<VTKFileCollection> _fileCollection):
-    fileCollection(_fileCollection)    
+    VTKReader(SPtr<VTKFileCollection> _fileCollection, uint _readLevel):
+    fileCollection(_fileCollection), 
+    readLevel(_readLevel)
     {
         this->nQuantities = fileCollection->getNumberOfQuantities();
         read = std::async([](){});
@@ -189,11 +190,12 @@ private:
     std::vector<std::vector<std::vector<uint>>> readIndices, writeIndices;
     std::vector<std::vector<size_t>> nFile;
     SPtr<VTKFileCollection> fileCollection;
+    uint readLevel;
     std::future<void> read;
 };
 
 
 SPtr<VelocityFileCollection> createFileCollection(std::string prefix, FileType type);
-SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection);
+SPtr<VelocityReader> createReaderForCollection(SPtr<VelocityFileCollection> fileCollection, uint readLevel);
 
 #endif //VELOCITY_SETTER_H_
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
index f70aa0cf886019e6a97ca5c86a0cdafa1296b141..ebe4a48f3d3eeb9f51486da0c7eb56e38238f2bb 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
@@ -354,7 +354,7 @@ private:
         return vf::gpu::BC_VELOCITY;
     }
 public:
-    uint nTRead;
+    uint nTRead; //!> read data every nth timestep
 
 private:
     real velocityX = 0.0;
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index bbc8c6a72359b6d97a774c695bc91ee20683b0ba..7983ca96205066447aecd39418cd1503d180cade 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -119,13 +119,19 @@ void LevelGridBuilder::setSlipGeometryBoundaryCondition(real normalX, real norma
     }
 }
 
+//=======================================================================================
+//! \brief Set stress boundary concdition using iMEM
+//! \param samplingOffset number of grid points above boundary where velocity for wall model is sampled
+//! \param z0 roghness length [m]
+//! \param dx dx of level 0 [m] 
+//!
 void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType, 
                                                     real nomalX, real normalY, real normalZ, 
-                                                    uint samplingOffset, real z0)
+                                                    uint samplingOffset, real z0, real dx)
 {
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-        SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0);
+        SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0/(dx*(level+1)));
 
         auto side = SideFactory::make(sideType);
 
@@ -245,11 +251,26 @@ void LevelGridBuilder::setNoSlipGeometryBoundaryCondition()
     }
 }
 
-void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, real velocityX, real velocityY, real velocityZ)
+void LevelGridBuilder::setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, 
+                                                        real velocityX, real velocityY, real velocityZ, std::vector<uint> fileLevelToGridLevelMap)
 {
+    if(fileLevelToGridLevelMap.empty())                         
+    {
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Mapping precursor file levels to the corresponding grid levels" << "\n";
+
+        for (uint level = 0; level < getNumberOfGridLevels(); level++)  
+            fileLevelToGridLevelMap.push_back(level);
+    }
+    else
+    {
+        if(fileLevelToGridLevelMap.size()!=getNumberOfGridLevels())
+            throw std::runtime_error("In setPrecursorBoundaryCondition: fileLevelToGridLevelMap does not match with the number of levels");
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Using user defined file to grid level mapping"  << "\n";
+    }
+
     for (uint level = 0; level < getNumberOfGridLevels(); level++)
     {
-        auto reader = createReaderForCollection(fileCollection);
+        auto reader = createReaderForCollection(fileCollection, fileLevelToGridLevelMap[level]);
         SPtr<PrecursorBoundaryCondition> precursorBoundaryCondition = PrecursorBoundaryCondition::make( reader, nTRead, velocityX, velocityY, velocityZ);
 
         auto side = SideFactory::make(sideType);
@@ -634,14 +655,13 @@ void LevelGridBuilder::getPrecursorValues(  uint* neighborNT, uint* neighborNB,
     int allNodesCounter = 0;
     uint tmpNTRead = 0;
     size_t tmpNQuantities = 0;
-
+    
     for (auto boundaryCondition : boundaryConditions[level]->precursorBoundaryConditions)
     {
         if( tmpNTRead == 0 )
             tmpNTRead = boundaryCondition->nTRead;
         if( tmpNTRead != boundaryCondition->nTRead )
             throw std::runtime_error("All precursor boundary conditions must have the same NTRead value");
-
         auto BCreader = boundaryCondition->getReader();
         BCreader->setWritingOffset(allIndicesCounter);
         reader.push_back(BCreader);
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 5d069606bdd5a67915a1fa9fb01f045b00ed1efd..caaa880efe9ecddb1ca2c8f20cf1b07eaaded58d 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -79,12 +79,13 @@ public:
     GRIDGENERATOR_EXPORT virtual ~LevelGridBuilder();
 
     GRIDGENERATOR_EXPORT void setSlipBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ);
-    GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0);
+    GRIDGENERATOR_EXPORT void setStressBoundaryCondition(SideType sideType, real nomalX, real normalY, real normalZ, uint samplingOffset, real z0, real dx);
     GRIDGENERATOR_EXPORT void setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz);
     GRIDGENERATOR_EXPORT void setPressureBoundaryCondition(SideType sideType, real rho);
     GRIDGENERATOR_EXPORT void setPeriodicBoundaryCondition(bool periodic_X, bool periodic_Y, bool periodic_Z);
     GRIDGENERATOR_EXPORT void setNoSlipBoundaryCondition(SideType sideType);
-    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f);
+    GRIDGENERATOR_EXPORT void setPrecursorBoundaryCondition(SideType sideType, SPtr<VelocityFileCollection> fileCollection, int nTRead, real velocityX=0.0f, real velocityY=0.0f, real velocityZ=0.0f,          
+                                                                std::vector<uint> fileLevelToGridLevelMap = {});
 
     GRIDGENERATOR_EXPORT void setEnableFixRefinementIntoTheWall(bool enableFixRefinementIntoTheWall);
 
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 2e7a307533cd1244202ae21d3007392e8c2ba3ed..17aea9cc1df1cf4862eb304d0cf5572d0d892847 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -13,6 +13,8 @@
 #include "CollisionStrategy.h"
 #include "RefinementStrategy.h"
 
+#include "Output/DistributionDebugInspector.h"
+
 void UpdateGrid27::updateGrid(int level, unsigned int t)
 {
     //////////////////////////////////////////////////////////////////////////
@@ -22,6 +24,13 @@ void UpdateGrid27::updateGrid(int level, unsigned int t)
         updateGrid(level + 1, t);
     }
 
+    // DistributionDebugInspector pre_inspector(1, 0.0, 16.0, 1000.0, 1030.0, 500.0, 700.0, "Pre Collision");
+    // DistributionDebugInspector post_inspector(1, 0.0, 16.0, 1000.0, 1030.0, 500.0, 700.0, "Post Collision");
+    //////////////////////////////////////////////////////////////////////////
+    
+    interactWithProbes(level, t);
+
+    // pre_inspector.inspect(para, level, t);
     //////////////////////////////////////////////////////////////////////////
 
     collision(this, para.get(), level, t);
@@ -47,13 +56,13 @@ void UpdateGrid27::updateGrid(int level, unsigned int t)
 
     //////////////////////////////////////////////////////////////////////////
     if( level != para->getFine() )
-    {
+    {   
         refinement(this, para.get(), level);
     }
 
+
     interactWithActuators(level, t);
 
-    interactWithProbes(level, t);
 }
 
 void UpdateGrid27::collisionAllNodes(int level, unsigned int t)
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 41f6ba29fec427ed7b5a9b1809020a2ee2b064fe..81e216e0556eab09d3c5af8c940be5a414d331df 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -319,7 +319,7 @@ void GridGenerator::allocArrays_BoundaryValues()
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPrecursorValues = int(builder->getPrecursorSize(level));
-        std::cout << "size precursor level " << level << " : " << numberOfPrecursorValues << std::endl;
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "size precursor level " << level << " : " << numberOfPrecursorValues << "\n";
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         int blocks = (numberOfPrecursorValues / para->getParH(level)->numberofthreads) + 1;
         para->getParH(level)->precursorBC.sizeQ = blocks * para->getParH(level)->numberofthreads;
@@ -329,13 +329,12 @@ void GridGenerator::allocArrays_BoundaryValues()
         para->getParD(level)->precursorBC.numberOfBCnodes = numberOfPrecursorValues;
         para->getParH(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
         para->getParD(level)->numberOfPrecursorBCnodesRead = numberOfPrecursorValues * para->getD3Qxx();
-
+        
         if (numberOfPrecursorValues > 1)
         {
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaAllocPrecursorBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
             builder->getPrecursorValues(
                     para->getParH(level)->precursorBC.planeNeighborNT, para->getParH(level)->precursorBC.planeNeighborNB, 
                     para->getParH(level)->precursorBC.planeNeighborST, para->getParH(level)->precursorBC.planeNeighborSB, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index b35e01eb997723eb12f5645857bc230536fe97fe..b4bdb61718f540280fd1d3510571eb920d9c7b4a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -1304,6 +1304,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												uint* neighborX, 
 												uint* neighborY, 
 												uint* neighborZ,
+												uint* typeOfGridNode,
 												uint* neighborsNT, 
 												uint* neighborsNB,
 												uint* neighborsST,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 489eb0a60ddb8bf9e1605a68e6d0f62211e26575..0c12982fcdf550c14c00c7f3dd0d2a6a08aed573 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -3247,7 +3247,7 @@ void PrecursorDevDistributions( LBMSimulationParameter* parameterDevice, QforPre
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
 
 	PrecursorDeviceDistributions<<< grid.grid, grid.threads >>>(boundaryCondition->k, boundaryCondition->numberOfBCnodes, boundaryCondition->numberOfPrecursorNodes, parameterDevice->distributions.f[0],
-		parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ,
+		parameterDevice->neighborX, parameterDevice->neighborY, parameterDevice->neighborZ, parameterDevice->typeOfGridNode,
 		boundaryCondition->planeNeighborNT, boundaryCondition->planeNeighborNB, boundaryCondition->planeNeighborST, boundaryCondition->planeNeighborSB,
 		boundaryCondition->weightsNT, boundaryCondition->weightsNB, boundaryCondition->weightsST, boundaryCondition->weightsSB,
 		boundaryCondition->last, boundaryCondition->current,
@@ -3873,6 +3873,7 @@ void ScaleCF_compressible(LBMSimulationParameter * parameterDeviceC, LBMSimulati
       parameterDeviceC->omega,
       parameterDeviceF->omega,
       offsetCF);
+
    getLastCudaError("scaleCF_compressible execution failed");
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
index a0daa5c229aabac360a71ae0f538f74124e3e963..3c3eeffa2c8f26950021054e28ffe9124b75ba98 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PrecursorBCs27.cu
@@ -509,7 +509,7 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
     ////////////////////////////////////////////////////////////////////////////////
 
     Distributions27 dist;
-    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
     unsigned int KQK  = subgridDistanceIndices[k];
     unsigned int kzero= KQK;
@@ -617,32 +617,34 @@ __global__ void PrecursorDeviceEQ27( 	int* subgridDistanceIndices,
       //! write the new distributions to the bc nodes
       //!
       (dist.f[DIR_P00   ])[ke  ] = f_W   ;
-      (dist.f[DIR_M00   ])[kw  ] = f_E   ;
-      (dist.f[DIR_0P0   ])[kn  ] = f_S   ;
-      (dist.f[DIR_0M0   ])[ks  ] = f_N   ;
-      (dist.f[DIR_00P   ])[kt  ] = f_B   ;
-      (dist.f[DIR_00M   ])[kb  ] = f_T   ;
       (dist.f[DIR_PP0  ])[kne  ] = f_SW  ;
-      (dist.f[DIR_MM0  ])[ksw  ] = f_NE  ;
+      (dist.f[DIR_P0M  ])[kbe  ] = f_TW  ;
       (dist.f[DIR_PM0  ])[kse  ] = f_NW  ;
-      (dist.f[DIR_MP0  ])[knw  ] = f_SE  ;
+      (dist.f[DIR_PMP ])[ktse ] = f_BNW ;
       (dist.f[DIR_P0P  ])[kte  ] = f_BW  ;
+      (dist.f[DIR_PPM ])[kbne ] = f_TSW ;
+      (dist.f[DIR_PPP ])[ktne ] = f_BSW ;
+      (dist.f[DIR_PMM ])[kbse ] = f_TNW ;
+      
+      (dist.f[DIR_M00   ])[kw  ] = f_E   ;
+      (dist.f[DIR_MM0  ])[ksw  ] = f_NE  ;
       (dist.f[DIR_M0M  ])[kbw  ] = f_TE  ;
-      (dist.f[DIR_P0M  ])[kbe  ] = f_TW  ;
+      (dist.f[DIR_MP0  ])[knw  ] = f_SE  ;
       (dist.f[DIR_M0P  ])[ktw  ] = f_BE  ;
+      (dist.f[DIR_MMM ])[kbsw ] = f_TNE ;
+      (dist.f[DIR_MMP ])[ktsw ] = f_BNE ;
+      (dist.f[DIR_MPP ])[ktnw ] = f_BSE ;
+      (dist.f[DIR_MPM ])[kbnw ] = f_TSE ;
+
+      (dist.f[DIR_0P0   ])[kn  ] = f_S   ;
+      (dist.f[DIR_0M0   ])[ks  ] = f_N   ;
+      (dist.f[DIR_00P   ])[kt  ] = f_B   ;
+      (dist.f[DIR_00M   ])[kb  ] = f_T   ;
       (dist.f[DIR_0PP  ])[ktn  ] = f_BS  ;
       (dist.f[DIR_0MM  ])[kbs  ] = f_TN  ;
       (dist.f[DIR_0PM  ])[kbn  ] = f_TS  ;
       (dist.f[DIR_0MP  ])[kts  ] = f_BN  ;
       (dist.f[DIR_000])[kzero] = f_ZERO;
-      (dist.f[DIR_PPP ])[ktne ] = f_BSW ;
-      (dist.f[DIR_MMP ])[ktsw ] = f_BNE ;
-      (dist.f[DIR_PMP ])[ktse ] = f_BNW ;
-      (dist.f[DIR_MPP ])[ktnw ] = f_BSE ;
-      (dist.f[DIR_PPM ])[kbne ] = f_TSW ;
-      (dist.f[DIR_MMM ])[kbsw ] = f_TNE ;
-      (dist.f[DIR_PMM ])[kbse ] = f_TNW ;
-      (dist.f[DIR_MPM ])[kbnw ] = f_TSE ;
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -654,6 +656,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 												uint* neighborX, 
 												uint* neighborY, 
 												uint* neighborZ,
+                                                uint* typeOfGridNode,
 												uint* neighborsNT, 
 												uint* neighborsNB,
 												uint* neighborsST,
@@ -760,7 +763,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
         f8NextInterp = f8Next[kNT];
     }
     Distributions27 dist;
-    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
     unsigned int KQK  = subgridDistanceIndices[k];
     // unsigned int kzero= KQK;
@@ -803,6 +806,7 @@ __global__ void PrecursorDeviceDistributions( 	int* subgridDistanceIndices,
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+//NOTE: Has not been tested after bug fix!
 __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
                                                 real* subgridDistances,
                                                 int sizeQ,
@@ -918,7 +922,7 @@ __global__ void QPrecursorDeviceDistributions( 	int* subgridDistanceIndices,
         f8NextInterp = f8Next[kNT];
     }
     Distributions27 dist;
-    getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+    getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
     unsigned int KQK  = subgridDistanceIndices[k];
     // unsigned int kzero= KQK;
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index 24e9c743c2ce2a3e236be99ecf03575a6b47df95..028d7d4f0d144574761f129eef5f1df341cfbed1 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -396,11 +396,13 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
 {
     if(para->getParH(level)->precursorBC.numberOfBCnodes == 0) return;
 
+    uint t_level = para->getTimeStep(level, t, true);
+
     uint lastTime =    (para->getParD(level)->precursorBC.nPrecursorReads-2)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into last arrays
     uint currentTime = (para->getParD(level)->precursorBC.nPrecursorReads-1)*para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into current arrays
     uint nextTime =     para->getParD(level)->precursorBC.nPrecursorReads   *para->getParD(level)->precursorBC.nTRead; // timestep currently loaded into next arrays
-
-    if(t>=currentTime)
+    
+    if(t_level>=currentTime)
     {
         //cycle time
         lastTime = currentTime;
@@ -414,6 +416,7 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
         para->getParD(level)->precursorBC.next = tmp;
 
         real loadTime = nextTime*pow(2,-level)*para->getTimeRatio();
+
         for(auto reader : para->getParH(level)->velocityReader)
         {   
             reader->getNextData(para->getParH(level)->precursorBC.next, para->getParH(level)->precursorBC.numberOfPrecursorNodes, loadTime);
@@ -423,6 +426,6 @@ void BCKernelManager::runPrecursorBCKernelPost(int level, uint t, CudaMemoryMana
         para->getParH(level)->precursorBC.nPrecursorReads++;  
     }
     
-    real tRatio = real(t-lastTime)/para->getParD(level)->precursorBC.nTRead;
+    real tRatio = real(t_level-lastTime)/para->getParD(level)->precursorBC.nTRead;
     precursorBoundaryConditionPost(para->getParD(level).get(), &para->getParD(level)->precursorBC, tRatio, para->getVelocityRatio());
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9ff86ea36c5b3c465990d622547d98cb4686f929
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
@@ -0,0 +1,143 @@
+
+#include "DistributionDebugInspector.h"
+
+#include "Parameter/Parameter.h"
+#include "LBM/LB.h" 
+#include "lbm/constants/D3Q27.h"
+#include <lbm/constants/NumericConstants.h>
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
+#include <cuda/CudaGrid.h>
+#include <cuda.h>
+
+#include <iostream>
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+
+__global__ void printFs(  real* distributions,
+                        bool isEvenTimestep,
+                        unsigned long numberOfFluidNodes,
+                        uint* neighborX,
+                        uint* neighborY,
+                        uint* neighborZ,
+                        uint* typeOfGridNode,
+                        real* coordX,
+                        real* coordY,
+                        real* coordZ,
+                        real minX,
+                        real maxX,
+                        real minY,
+                        real maxY,
+                        real minZ,
+                        real maxZ)
+                        {
+                            const unsigned k_000 = vf::gpu::getNodeIndex();
+
+                            if (k_000 >= numberOfFluidNodes || typeOfGridNode[k_000]!=GEO_FLUID ) 
+                                return;
+
+                            real coordNodeX = coordX[k_000];
+                            real coordNodeY = coordY[k_000];
+                            real coordNodeZ = coordZ[k_000];
+
+                            if( coordNodeX>=minX && coordNodeX<=maxX &&
+                                coordNodeY>=minY && coordNodeY<=maxY &&
+                                coordNodeZ>=minZ && coordNodeZ<=maxZ    )
+                                {
+                                    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfFluidNodes, isEvenTimestep);
+                                    ////////////////////////////////////////////////////////////////////////////////
+                                    //! - Set neighbor indices (necessary for indirect addressing)
+                                    uint k_M00 = neighborX[k_000];
+                                    uint k_0M0 = neighborY[k_000];
+                                    uint k_00M = neighborZ[k_000];
+                                    uint k_MM0 = neighborY[k_M00];
+                                    uint k_M0M = neighborZ[k_M00];
+                                    uint k_0MM = neighborZ[k_0M0];
+                                    uint k_MMM = neighborZ[k_MM0];
+                                    ////////////////////////////////////////////////////////////////////////////////////
+                                    //! - Set local distributions
+                                    //!
+                                    real f_000 = (dist.f[DIR_000])[k_000];
+                                    real f_P00 = (dist.f[DIR_P00])[k_000];
+                                    real f_M00 = (dist.f[DIR_M00])[k_M00];
+                                    real f_0P0 = (dist.f[DIR_0P0])[k_000];
+                                    real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+                                    real f_00P = (dist.f[DIR_00P])[k_000];
+                                    real f_00M = (dist.f[DIR_00M])[k_00M];
+                                    real f_PP0 = (dist.f[DIR_PP0])[k_000];
+                                    real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+                                    real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+                                    real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+                                    real f_P0P = (dist.f[DIR_P0P])[k_000];
+                                    real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+                                    real f_P0M = (dist.f[DIR_P0M])[k_00M];
+                                    real f_M0P = (dist.f[DIR_M0P])[k_M00];
+                                    real f_0PP = (dist.f[DIR_0PP])[k_000];
+                                    real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+                                    real f_0PM = (dist.f[DIR_0PM])[k_00M];
+                                    real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+                                    real f_PPP = (dist.f[DIR_PPP])[k_000];
+                                    real f_MPP = (dist.f[DIR_MPP])[k_M00];
+                                    real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+                                    real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+                                    real f_PPM = (dist.f[DIR_PPM])[k_00M];
+                                    real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+                                    real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+                                    real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+                                    real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+                                                (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                                                ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                                                ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                                                    f_000;
+
+                                    real oneOverRho = c1o1 / (c1o1 + drho);
+
+                                    real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                                                (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                                            oneOverRho;
+                                    real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                                                (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                                            oneOverRho;
+                                    real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                                                (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                                            oneOverRho;
+
+                                    printf("Node %u \t (%f\t%f\t%f)\n rho: %f\t velo: %f\t %f \t %f\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, drho, vvx, vvy, vvz);
+                                    printf("Node %u \t (%f\t%f\t%f)\n f_M00\t%f\t f_000\t%f\t f_P00\t%f\n f_MP0\t%f\t f_0P0\t%f\t f_PP0\t%f\n f_MM0\t%f\t f_0M0\t%f\t f_PM0\t%f\n f_M0P\t%f\t f_00P\t%f\t f_P0P\t%f\n f_M0M\t%f\t f_00M\t%f\t f_P0M\t%f\n f_MPP\t%f\t f_0PP\t%f\t f_PPP\t%f\n f_MPM\t%f\t f_0PM\t%f\t f_PPM\t%f\n f_MMP\t%f\t f_0MP\t%f\t f_PMP\t%f\n f_MMM\t%f\t f_0MM\t%f\t f_PMM\t%f\n\n\n" , k_000, coordNodeX, coordNodeY, coordNodeZ, f_M00, f_000, f_P00,f_MP0, f_0P0, f_PP0, f_MM0, f_0M0, f_PM0, f_M0P, f_00P, f_P0P, f_M0M, f_00M, f_P0M, f_MPP, f_0PP, f_PPP, f_MPM, f_0PM, f_PPM, f_MMP, f_0MP, f_PMP, f_MMM, f_0MM, f_PMM);
+
+                                }
+
+                        }
+
+
+
+
+void DistributionDebugInspector::inspect(std::shared_ptr<Parameter> para, uint level, uint t){
+    
+    if(this->inspectionLevel!=level)
+        return;
+
+    std::cout << tag << ": distributions on level " << level << " at t " << t <<  std::endl;
+
+    vf::cuda::CudaGrid cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+
+    printFs <<< cudaGrid.grid, cudaGrid.threads >>>(    para->getParD(level)->distributions.f[0],
+                                                        para->getParD(level)->isEvenTimestep,
+                                                        (unsigned long)para->getParD(level)->numberOfNodes,
+                                                        para->getParD(level)->neighborX,
+                                                        para->getParD(level)->neighborY,
+                                                        para->getParD(level)->neighborZ,
+                                                        para->getParD(level)->typeOfGridNode,
+                                                        para->getParD(level)->coordinateX,
+                                                        para->getParD(level)->coordinateY,
+                                                        para->getParD(level)->coordinateZ,
+                                                        minX,
+                                                        maxX,
+                                                        minY,
+                                                        maxY,
+                                                        minZ,
+                                                        maxZ);
+
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
new file mode 100644
index 0000000000000000000000000000000000000000..73c6b9fa21999d577349401b31a6183e7b6d7f3b
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.h
@@ -0,0 +1,38 @@
+#ifndef FILE_WRITER_H
+#define FILE_WRITER_H
+
+#include "Parameter/Parameter.h"
+
+
+class DistributionDebugInspector
+{
+public:
+	DistributionDebugInspector(uint _inspectionLevel, real _minX, real _maxX, real _minY, real _maxY, real _minZ, real _maxZ, std::string _tag):
+    inspectionLevel(_inspectionLevel),
+    minX(_minX),
+    maxX(_maxX),
+    minY(_minY),
+    maxY(_maxY),
+    minZ(_minZ),
+    maxZ(_maxZ),
+    tag(_tag)
+    {};
+	
+    ~DistributionDebugInspector(){}
+
+    void inspect(std::shared_ptr<Parameter> para, uint level, uint t);
+
+
+private:
+uint inspectionLevel;
+real minX;
+real maxX;
+real minY;
+real maxY;
+real minZ;
+real maxZ;
+std::string tag;
+
+};
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
index 7b4fe0d7a76145edd93abce2c5e0c9c43c27809d..5b1b5ab29d9da3174901a088cc3fd7eb95f82d3f 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
@@ -129,8 +129,9 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
             real pointCoordX = para->getParH(level)->coordinateX[j];
             real pointCoordY = para->getParH(level)->coordinateY[j];
             real pointCoordZ = para->getParH(level)->coordinateZ[j];
-            if( pointCoordX < (dx+xPos) && pointCoordX >= xPos &&
-                pointCoordY<=yMax && pointCoordY>=yMin && 
+            if( para->getParH(level)->typeOfGridNode[j] == GEO_FLUID &&
+                pointCoordX < (dx+xPos) && pointCoordX >= xPos       &&
+                pointCoordY<=yMax && pointCoordY>=yMin               && 
                 pointCoordZ<=zMax && pointCoordZ>=zMin)
             {
                 highestY = max(highestY, pointCoordY);
@@ -140,7 +141,7 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
                 lowestZ = min(lowestZ, pointCoordZ);
                 indicesOnGrid.push_back(j);    
                 coordY.push_back(pointCoordY);            
-                coordZ.push_back(pointCoordZ);            
+                coordZ.push_back(pointCoordZ);    
             }
         }
         assert("PrecursorWriter did not find any points on the grid"&& indicesOnGrid.size()==0);
@@ -154,13 +155,12 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
                 int idx;
                 index1d(idx, idxY, idxZ, ny, nz);
                 indicesOnPlane.push_back(idx);
-                // printf("idx %d, idy %d, idz %d, ny %d, nz %d\n", idx, idxY, idxZ, ny, nz);
         }
 
         precursorStructs[level] = SPtr<PrecursorStruct>(new PrecursorStruct);
         precursorStructs[level]->nPoints = (uint)indicesOnGrid.size();
         precursorStructs[level]->indicesOnPlane = (int*) malloc(precursorStructs[level]->nPoints*sizeof(int));
-        precursorStructs[level]->spacing = makeUbTuple(dx, dx, tSave*para->getTimeRatio());
+        precursorStructs[level]->spacing = makeUbTuple(dx, dx, tSave*para->getTimeRatio()*pow(2,-level));
         precursorStructs[level]->origin = makeUbTuple(lowestY, lowestZ);
         precursorStructs[level]->extent = makeUbTuple(0, ny-1, 0, nz-1);
         precursorStructs[level]->nPointsInPlane = ny*nz;
@@ -181,8 +181,6 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
             break;
         }
 
-        // printf("points %zu points on plane %zu \n",  indicesOnGrid.size(),  indicesOnPlane.size());
-
         cudaManager->cudaAllocPrecursorWriter(this, level);
     
         std::copy(indicesOnGrid.begin(), indicesOnGrid.end(), precursorStructs[level]->indicesH);
@@ -195,7 +193,11 @@ void PrecursorWriter::init(Parameter* para, GridProvider* gridProvider, CudaMemo
 
 void PrecursorWriter::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
 {
-    if(t>tStartOut ? ((t-tStartOut) % tSave)==0 : false)
+    uint t_level         = para->getTimeStep(level, t, true);
+    uint tStartOut_level = tStartOut*pow(2, level);
+    uint tEnd_level      = para->getTimestepEnd()*pow(2, level);
+
+    if(t_level>tStartOut_level && ((t_level-tStartOut_level) % tSave)==0)
     {
         vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, precursorStructs[level]->nPoints);
 
@@ -225,7 +227,7 @@ void PrecursorWriter::interact(Parameter* para, CudaMemoryManager* cudaManager,
 
         precursorStructs[level]->timestepsBuffered++;
 
-        if(precursorStructs[level]->timestepsBuffered >= precursorStructs[level]->timestepsPerFile)
+        if(precursorStructs[level]->timestepsBuffered >= precursorStructs[level]->timestepsPerFile || t == para->getTimestepEnd())
         {
         // switch host buffer and data pointer so precursor data is copied in buffer and written from data
 
@@ -263,8 +265,6 @@ void PrecursorWriter::write(Parameter* para, int level, uint timestepsBuffered)
 
     int startTime = precursorStructs[level]->filesWritten*precursorStructs[level]->timestepsPerFile;
 
-    // printf("points in plane %d, total timesteps %d, ntimesteps %d \n", nPointsInPlane, nTotalTimesteps, nTimesteps);
-
     UbTupleInt6 extent = makeUbTuple(   val<1>(precursorStructs[level]->extent),    val<2>(precursorStructs[level]->extent), 
                                         val<3>(precursorStructs[level]->extent),    val<4>(precursorStructs[level]->extent), 
                                         startTime,                          startTime+(int)timestepsBuffered-1);