diff --git a/apps/gpu/RotatingGrid/RotatingGrid.cpp b/apps/gpu/RotatingGrid/RotatingGrid.cpp
index c3db2a289d39f37f3a8190ad3c55d93ed4fc3a5c..2d95dd3fdadcede90487073dbd4f3cece13a8e94 100644
--- a/apps/gpu/RotatingGrid/RotatingGrid.cpp
+++ b/apps/gpu/RotatingGrid/RotatingGrid.cpp
@@ -109,9 +109,9 @@ int main()
         //////////////////////////////////////////////////////////////////////////
         auto gridBuilder = std::make_shared<MultipleGridBuilder>();
 
-        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 2.0 * L, 0.5 * L, 0.5 * L, dx);
+        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
 
-        if (rotOrInt == Rot) gridBuilder->addGridRotatingGrid(std::make_shared<Cylinder>(0.2, 0.0, 0.0, 0.25 * L, 1. * L, Axis::x));
+        if (rotOrInt == Rot) gridBuilder->addGridRotatingGrid(std::make_shared<Cylinder>(0.0, 0.0, 0.0, 0.25 * L, 0.75 * L, Axis::z));
         if (rotOrInt == Int) gridBuilder->addGrid(std::make_shared<Cylinder>(0.2, 0.1, 0.1, 0.25 * L, 0.8 * L, Axis::x), 1);
 
         GridScalingFactory scalingFactory = GridScalingFactory();
@@ -152,15 +152,13 @@ int main()
         // gridBuilder->setSlipBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
         // gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0);
 
-        gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
-        gridBuilder->setNoSlipBoundaryCondition(SideType::PX);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
+        gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
         gridBuilder->setNoSlipBoundaryCondition(SideType::PZ);
 
-
-        gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, -velocityLB, 0.0);
-        // gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
-        gridBuilder->setPressureBoundaryCondition(SideType::MY, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PX, -velocityLB, 0.0, 0.0);
+        gridBuilder->setPressureBoundaryCondition(SideType::MX, 0.0);
 
         BoundaryConditionFactory bcFactory;
 
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 2425184d8a59390e343e603d5f775ccf88c491ca..e04c0ae72b1b9e08979972ecfb9bdc54146b7bb6 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -332,8 +332,6 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
         }
     }
 
-    // Allocate host memory for distribution debug writer
-    cudaMemoryManager->cudaAllocFsForAllLevelsOnHost();
 
     //////////////////////////////////////////////////////////////////////////
     // Restart
@@ -379,12 +377,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
 
     VF_LOG_INFO("... done.");
 
-    //////////////////////////////////////////////////////////////////////////
-    VF_LOG_INFO("used Device Memory: {} MB", cudaMemoryManager->getMemsizeGPU() / 1000000.0);
-    // std::cout << "Process " << communicator.getPID() <<": used device memory" << cudaMemoryManager->getMemsizeGPU() /
-    // 1000000.0 << " MB\n" << std::endl;
-    //////////////////////////////////////////////////////////////////////////
-
+    VF_LOG_INFO("Write vtk files for debugging...");
     // NeighborDebugWriter::writeNeighborLinkLinesDebug(para.get());
 
     InterfaceDebugWriter::writeInterfaceLinesDebugCF(para.get(), 0);
@@ -399,6 +392,16 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
     //        EdgeNodeDebugWriter::writeEdgeNodesXZ_Send(para);
     //        EdgeNodeDebugWriter::writeEdgeNodesXZ_Recv(para);
     //    }
+
+    // Allocate host memory for DistributionDebugWriter
+    cudaMemoryManager->cudaAllocFsForAllLevelsOnHost();
+    VF_LOG_INFO("...done");
+    
+    //////////////////////////////////////////////////////////////////////////
+    VF_LOG_INFO("used Device Memory: {} MB", cudaMemoryManager->getMemsizeGPU() / 1000000.0);
+    // std::cout << "Process " << communicator.getPID() <<": used device memory" << cudaMemoryManager->getMemsizeGPU() /
+    // 1000000.0 << " MB\n" << std::endl;
+    //////////////////////////////////////////////////////////////////////////
 }
 
 void Simulation::addKineticEnergyAnalyzer(uint tAnalyse)