diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 1b8ce0027cc34042ed2507b883898832f4eaa9b8..64c0cfd85cae1b82b32b47de7ac95fbc42567578 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -7,7 +7,8 @@
     "ms-vscode.cpptools",
     "ms-vscode.cpptools-extension-pack",
     "xaver.clang-format",  
-    "notskm.clang-tidy"  
+    "notskm.clang-tidy",
+     "streetsidesoftware.code-spell-checker"
     ],  
     "runArgs": ["--gpus","all"],  
     "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.2",
diff --git a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
index cabbc21c92113b490d31b6e6ae9ad834b41fd44b..f1610b7e274e1c12c34772ef8a8d74da49ee9a81 100644
--- a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
+++ b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
@@ -6,7 +6,7 @@
 #include <memory>
 #include <ostream>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 class VertexMemento;
 
diff --git a/src/gpu/GridGenerator/grid/Cell.h b/src/gpu/GridGenerator/grid/Cell.h
index 845e02eaa66a5b2327b5a2ba63b1227962ab8f61..0d6c3e13d391a451201131eb27216102b3545077 100644
--- a/src/gpu/GridGenerator/grid/Cell.h
+++ b/src/gpu/GridGenerator/grid/Cell.h
@@ -1,9 +1,9 @@
 #ifndef CELL_H
 #define CELL_H
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
-#include "utilities/cuda/cudaDefines.h"
+#include "gpu/GridGenerator/utilities/cuda/cudaDefines.h"
 
 struct Point
 {
diff --git a/src/gpu/GridGenerator/grid/Field.h b/src/gpu/GridGenerator/grid/Field.h
index 9e7513108fa039cc6b14ba519fce6acf667ed2f6..d2ad5ca782ff68f1983108609c7bb23e729985b0 100644
--- a/src/gpu/GridGenerator/grid/Field.h
+++ b/src/gpu/GridGenerator/grid/Field.h
@@ -1,7 +1,7 @@
 #ifndef FIELD_H
 #define FIELD_H
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 struct Vertex;
 class GridStrategy;
diff --git a/src/gpu/GridGenerator/grid/Grid.h b/src/gpu/GridGenerator/grid/Grid.h
index 21cfea189e58067fabc753d27fb13803abdcb9aa..36d607bc06c759b3dc7a7d27dfacbe383283a24e 100644
--- a/src/gpu/GridGenerator/grid/Grid.h
+++ b/src/gpu/GridGenerator/grid/Grid.h
@@ -3,11 +3,11 @@
 
 #include "Core/LbmOrGks.h"
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
-#include "geometries/Vertex/Vertex.h"
+#include "gpu/GridGenerator/geometries/Vertex/Vertex.h"
 
-#include "grid/Cell.h"
+#include "gpu/GridGenerator/grid/Cell.h"
 
 class TriangularMesh;
 struct Vertex;
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index 9ff99fad4e2577536f50ce112acdc0e2e6bf40d5..3d516c6710d44d5c8da45c0f71e36ea21e6e9655 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <memory>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 #define GEOMQS 6
 #define INLETQS 0
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index f212f2c02ee21ab8d1f944863b6e1b59f0615f47..f90bbe231a64a2d1c51536fd96fa69792c1348da 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -6,11 +6,11 @@
 #include <memory>
 #include <array>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
-#include "grid/GridBuilder/GridBuilder.h"
-#include "grid/Grid.h"
-#include "grid/GridInterface.h"
+#include "gpu/GridGenerator/grid/GridBuilder/GridBuilder.h"
+#include "gpu/GridGenerator/grid/Grid.h"
+#include "gpu/GridGenerator/grid/GridInterface.h"
 
 struct Vertex;
 class  Grid;
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index 09f278a4d41bba346fd9ca3b38d028ef5d9fca4d..08c4f795b320cdd425e7d2c10c4d3173ecb6e83c 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -5,12 +5,12 @@
 
 #include "Core/LbmOrGks.h"
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
-#include "grid/distributions/Distribution.h"
-#include "grid/Grid.h"
-#include "grid/Cell.h"
-#include "grid/Field.h" 
+#include "gpu/GridGenerator/grid/distributions/Distribution.h"
+#include "gpu/GridGenerator/grid/Grid.h"
+#include "gpu/GridGenerator/grid/Cell.h"
+#include "gpu/GridGenerator/grid/Field.h" 
 
 class TriangularMesh;
 struct Vertex;
@@ -39,7 +39,7 @@ extern CONSTANT int DIRECTIONS[DIR_END_MAX][DIMENSION];
 
 class GRIDGENERATOR_EXPORT GridImp : public enableSharedFromThis<GridImp>, public Grid
 {
-private:
+protected:
     CUDA_HOST GridImp();
     CUDA_HOST GridImp(Object* object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta, SPtr<GridStrategy> gridStrategy, Distribution d, uint level);
 
diff --git a/src/gpu/GridGenerator/grid/GridInterface.h b/src/gpu/GridGenerator/grid/GridInterface.h
index d0f04ea3451b3044c349aa0e27d2f7c6e567128c..2044fb494924046ac255ad1a1e7cc74496adae6e 100644
--- a/src/gpu/GridGenerator/grid/GridInterface.h
+++ b/src/gpu/GridGenerator/grid/GridInterface.h
@@ -1,7 +1,7 @@
 #ifndef GRID_INTERFACE_H
 #define GRID_INTERFACE_H
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 class GridImp;
 
diff --git a/src/gpu/GridGenerator/grid/distributions/Distribution.h b/src/gpu/GridGenerator/grid/distributions/Distribution.h
index b05b5db3652ee952ff083db560ed8316688819c9..04b7093aadd16cc755358d123a415fd5bb032703 100644
--- a/src/gpu/GridGenerator/grid/distributions/Distribution.h
+++ b/src/gpu/GridGenerator/grid/distributions/Distribution.h
@@ -4,7 +4,7 @@
 #include <vector>
 #include <string>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 #define DIR_END_MAX 27
 
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
index eb10c9c107bb8e777e6f9a5d7bb4a57d021266fe..116c84b12365e8932dadd82180711bcaa9706928 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
@@ -10,7 +10,7 @@
 
 #include "Core/NonCreatable.h"
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 class UnstructuredGridBuilder;
 class GridBuilder;
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index f64b0e676f3f76aac601a372c34b5ad2559df6fe..14bf4f6641f9b874788d3674b11fa6ca135f7564 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -20,6 +20,7 @@ vf_add_tests()
 if(BUILD_VF_UNIT_TESTS)
     set_target_properties(VirtualFluids_GPUTests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
     set_source_files_properties(Kernel/Utilities/DistributionHelperTests.cpp PROPERTIES LANGUAGE CUDA)
+	set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA)
     set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA)
     target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
 endif()
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index d12a25733539c319cdfd6ead1d6aa169fe6ae52d..7ffe404cf50f2fdddecf7594da75d05595db19d4 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -9,7 +9,7 @@
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 
-#include <GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h>
+#include <gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h>
 
 class Parameter;
 class GridBuilder;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index 5edae460afafff5cb68c9d98fc13fcbe48067fce..fe1ae5ba66c23e7fd2311f085327eb3b0a57a717 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -339,7 +339,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborX[j].index, direction,
                                                    level);
                         if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC)
-                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, j, direction);                        
+                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseX(level, j, direction);             
                         ////////////////////////////////////////////////////////////////////////////////////////
                         cudaMemoryManager->cudaCopyProcessNeighborXIndex(level, j);
                         ////////////////////////////////////////////////////////////////////////////////////////
@@ -408,7 +408,7 @@ void GridGenerator::initalValuesDomainDecompostion()
                         builder->getReceiveIndices(para->getParH(level)->recvProcessNeighborY[j].index, direction,
                                                    level);
                         if (level != builder->getNumberOfGridLevels() - 1 && para->useReducedCommunicationAfterFtoC)
-                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseY(level, j, direction);                       
+                            indexRearrangement->initCommunicationArraysForCommAfterFinetoCoarseY(level, j, direction);
                         ////////////////////////////////////////////////////////////////////////////////////////
                         cudaMemoryManager->cudaCopyProcessNeighborYIndex(level, j);
                         ////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
index 70a5505d95ad16e9df3a80545a23b90d83f87d3b..1bdf32f281dc4f2d22cf6bf3b3ff43ca62cd592c 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
@@ -1,221 +1,274 @@
 #include "IndexRearrangementForStreams.h"
 
+#include "Communication/Communicator.h"
 #include "Parameter/Parameter.h"
-#include <GridGenerator/grid/GridBuilder/GridBuilder.h>
 #include <GridGenerator/grid/Grid.h>
-#include "Communication/Communicator.h"
+#include <GridGenerator/grid/GridBuilder/GridBuilder.h>
 
-#include <iostream>
 #include <algorithm>
+#include <iostream>
 
 IndexRearrangementForStreams::IndexRearrangementForStreams(std::shared_ptr<Parameter> para,
                                                            std::shared_ptr<GridBuilder> builder)
     : para(para), builder(builder)
-{ }
+{
+}
 
-void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j,
+void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level,
+                                                                                    int indexOfProcessNeighbor,
                                                                                     int direction)
 {
     // init send indices for communication after coarse to fine
     std::cout << "communication: reorder send indices X ";
-    para->initNumberOfProcessNeighborsAfterFtoCX(level);
+    para->initProcessNeighborsAfterFtoCX(level);
     std::vector<uint> sendIndicesForCommAfterFtoCPositions;
-    reorderSendIndicesForCommAfterFtoCX(direction, level, j, sendIndicesForCommAfterFtoCPositions);
-    para->setSendProcessNeighborsAfterFtoCX(para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes,
-                                            level, j);
+    reorderSendIndicesForCommAfterFtoCX(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions);
+    para->setSendProcessNeighborsAfterFtoCX(
+        para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
 
-    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process
+    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions
+    // from sending process
     std::cout << "mpi send and receive ";
     std::vector<uint> recvIndicesForCommAfterFtoCPositions;
     recvIndicesForCommAfterFtoCPositions.resize(
-        (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes *
+        (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes *
         2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way
     auto comm = vf::gpu::Communicator::getInstanz();
     comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->recvProcessNeighborX[j].rankNeighbor,
+                          para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor,
                           sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->sendProcessNeighborX[j].rankNeighbor);
-    
+                          para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].rankNeighbor);
+
     // resize receiving vector to correct size
     auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end());
-    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
+    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1),
+                                               recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
 
     // init receive indices for communication after coarse to fine
     std::cout << "reorder receive indices ";
-    reorderRecvIndicesForCommAfterFtoCX(direction, level, j, recvIndicesForCommAfterFtoCPositions);
-    para->setRecvProcessNeighborsAfterFtoCX(para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].numberOfNodes,
-                                            level, j);
-    copyProcessNeighborToCommAfterFtoCX(level, j);
+    reorderRecvIndicesForCommAfterFtoCX(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions);
+    para->setRecvProcessNeighborsAfterFtoCX(
+        para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
+    copyProcessNeighborToCommAfterFtoCX(level, indexOfProcessNeighbor);
 
     std::cout << "done." << std::endl;
 }
 
-void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction)
+void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level,
+                                                                                    int indexOfProcessNeighbor,
+                                                                                    int direction)
 {
     // init send indices for communication after coarse to fine
     std::cout << "communication: reorder send indices Y ";
-    para->initNumberOfProcessNeighborsAfterFtoCY(level);
+    para->initProcessNeighborsAfterFtoCY(level);
     std::vector<uint> sendIndicesForCommAfterFtoCPositions;
-    reorderSendIndicesForCommAfterFtoCY(direction, level, j, sendIndicesForCommAfterFtoCPositions);
-    para->setSendProcessNeighborsAfterFtoCY(para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes,
-                                            level, j);
+    reorderSendIndicesForCommAfterFtoCY(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions);
+    para->setSendProcessNeighborsAfterFtoCY(
+        para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
 
-    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process
+    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions
+    // from sending process
     std::cout << "mpi send and receive ";
-    std::vector<uint> recvIndicesForCommAfterFtoCPositions; 
-    recvIndicesForCommAfterFtoCPositions.resize((size_t) para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes *
-                                                2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way
+    std::vector<uint> recvIndicesForCommAfterFtoCPositions;
+    recvIndicesForCommAfterFtoCPositions.resize(
+        (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes *
+        2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way
     auto comm = vf::gpu::Communicator::getInstanz();
     comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->recvProcessNeighborY[j].rankNeighbor,
+                          para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor,
                           sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->sendProcessNeighborY[j].rankNeighbor);
-    
+                          para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].rankNeighbor);
+
     // resize receiving vector to correct size
     auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end());
-    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
+    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1),
+                                               recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
 
     // init receive indices for communication after coarse to fine
     std::cout << "reorder receive indices ";
-    reorderRecvIndicesForCommAfterFtoCY(direction, level, j, recvIndicesForCommAfterFtoCPositions);
-    para->setRecvProcessNeighborsAfterFtoCY(para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].numberOfNodes,
-                                            level, j);
+    reorderRecvIndicesForCommAfterFtoCY(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions);
+    para->setRecvProcessNeighborsAfterFtoCY(
+        para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
 
-    copyProcessNeighborToCommAfterFtoCY(level, j);
+    copyProcessNeighborToCommAfterFtoCY(level, indexOfProcessNeighbor);
 
     std::cout << "done." << std::endl;
 }
 
-void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction)
+void IndexRearrangementForStreams::initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level,
+                                                                                    int indexOfProcessNeighbor,
+                                                                                    int direction)
 {
     // init send indices for communication after coarse to fine
     std::cout << "communication: reorder send indices Z ";
-    para->initNumberOfProcessNeighborsAfterFtoCZ(level);
+    para->initProcessNeighborsAfterFtoCZ(level);
     std::vector<uint> sendIndicesForCommAfterFtoCPositions;
-    reorderSendIndicesForCommAfterFtoCZ(direction, level, j, sendIndicesForCommAfterFtoCPositions);
-    para->setSendProcessNeighborsAfterFtoCZ(para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes,
-                                            level, j);
+    reorderSendIndicesForCommAfterFtoCZ(direction, level, indexOfProcessNeighbor, sendIndicesForCommAfterFtoCPositions);
+    para->setSendProcessNeighborsAfterFtoCZ(
+        para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
 
-    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions from sending process
+    // send sendIndicesForCommAfterFtoCPositions to receiving process and receive recvIndicesForCommAfterFtoCPositions
+    // from sending process
     std::cout << "mpi send and receive ";
-    std::vector<uint> recvIndicesForCommAfterFtoCPositions; 
-    recvIndicesForCommAfterFtoCPositions.resize((size_t) para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes *
-                                                2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way
+    std::vector<uint> recvIndicesForCommAfterFtoCPositions;
+    recvIndicesForCommAfterFtoCPositions.resize(
+        (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes *
+        2); // give vector an arbitraty size (larger than needed) // TODO: Find a better way
     auto comm = vf::gpu::Communicator::getInstanz();
     comm->exchangeIndices(recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->recvProcessNeighborZ[j].rankNeighbor,
+                          para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor,
                           sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
-                          para->getParH(level)->sendProcessNeighborZ[j].rankNeighbor);
-    
+                          para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor);
+
     // resize receiving vector to correct size
     auto it = std::unique(recvIndicesForCommAfterFtoCPositions.begin(), recvIndicesForCommAfterFtoCPositions.end());
-    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1), recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
+    recvIndicesForCommAfterFtoCPositions.erase(std::prev(it, 1),
+                                               recvIndicesForCommAfterFtoCPositions.end()); // TODO: Find a better way
 
     // init receive indices for communication after coarse to fine
     std::cout << "reorder receive indices ";
-    reorderRecvIndicesForCommAfterFtoCZ(direction, level, j, recvIndicesForCommAfterFtoCPositions);
-    para->setRecvProcessNeighborsAfterFtoCZ(para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].numberOfNodes,
-                                            level, j);
+    reorderRecvIndicesForCommAfterFtoCZ(direction, level, indexOfProcessNeighbor, recvIndicesForCommAfterFtoCPositions);
+    para->setRecvProcessNeighborsAfterFtoCZ(
+        para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes, level,
+        indexOfProcessNeighbor);
 
-    copyProcessNeighborToCommAfterFtoCZ(level, j);
+    copyProcessNeighborToCommAfterFtoCZ(level, indexOfProcessNeighbor);
 
     std::cout << "done." << std::endl;
 }
 
-void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCX(const uint &level, int j)
+void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor)
 {
     // init f[0]*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCX[j].f[0] = para->getParD(level)->sendProcessNeighborX[j].f[0];
-    para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].f[0] = para->getParH(level)->sendProcessNeighborX[j].f[0];
-    para->getParD(level)->recvProcessNeighborsAfterFtoCX[j].f[0] = para->getParD(level)->recvProcessNeighborX[j].f[0];
-    para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].f[0] = para->getParH(level)->recvProcessNeighborX[j].f[0];
+    para->getParD(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->sendProcessNeighborX[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].f[0];
+    para->getParD(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->recvProcessNeighborX[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].f[0];
 
     // init index*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCX[j].index = para->getParD(level)->sendProcessNeighborX[j].index;
-    para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].index = para->getParH(level)->sendProcessNeighborX[j].index;
-    para->getParD(level)->recvProcessNeighborsAfterFtoCX[j].index = para->getParD(level)->recvProcessNeighborX[j].index;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].index = para->getParH(level)->recvProcessNeighborX[j].index;
+    para->getParD(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index =
+        para->getParD(level)->sendProcessNeighborX[indexOfProcessNeighbor].index;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index =
+        para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].index;
+    para->getParD(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index =
+        para->getParD(level)->recvProcessNeighborX[indexOfProcessNeighbor].index;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].index =
+        para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].index;
 
     // rank neighbor
-    para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].rankNeighbor = para->getParH(level)->sendProcessNeighborX[j].rankNeighbor;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].rankNeighbor = para->getParH(level)->recvProcessNeighborX[j].rankNeighbor;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].rankNeighbor;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor;
 }
 
-void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCY(const uint &level, int j)
+void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor)
 {
     // init f[0]*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCY[j].f[0] = para->getParD(level)->sendProcessNeighborY[j].f[0];
-    para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].f[0] = para->getParH(level)->sendProcessNeighborY[j].f[0];
-    para->getParD(level)->recvProcessNeighborsAfterFtoCY[j].f[0] = para->getParD(level)->recvProcessNeighborY[j].f[0];
-    para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].f[0] = para->getParH(level)->recvProcessNeighborY[j].f[0];
+    para->getParD(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->sendProcessNeighborY[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].f[0];
+    para->getParD(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->recvProcessNeighborY[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].f[0];
 
     // init index*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCY[j].index = para->getParD(level)->sendProcessNeighborY[j].index;
-    para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].index = para->getParH(level)->sendProcessNeighborY[j].index;
-    para->getParD(level)->recvProcessNeighborsAfterFtoCY[j].index = para->getParD(level)->recvProcessNeighborY[j].index;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].index = para->getParH(level)->recvProcessNeighborY[j].index;
+    para->getParD(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index =
+        para->getParD(level)->sendProcessNeighborY[indexOfProcessNeighbor].index;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index =
+        para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].index;
+    para->getParD(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index =
+        para->getParD(level)->recvProcessNeighborY[indexOfProcessNeighbor].index;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].index =
+        para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].index;
 
     // rank neighbor
-    para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].rankNeighbor = para->getParH(level)->sendProcessNeighborY[j].rankNeighbor;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].rankNeighbor = para->getParH(level)->recvProcessNeighborY[j].rankNeighbor;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].rankNeighbor;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor;
 }
 
-void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCZ(const uint &level, int j)
+void IndexRearrangementForStreams::copyProcessNeighborToCommAfterFtoCZ(const uint &level, int indexOfProcessNeighbor)
 {
     // init f[0]*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCZ[j].f[0] = para->getParD(level)->sendProcessNeighborZ[j].f[0];
-    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].f[0] = para->getParH(level)->sendProcessNeighborZ[j].f[0];
-    para->getParD(level)->recvProcessNeighborsAfterFtoCZ[j].f[0] = para->getParD(level)->recvProcessNeighborZ[j].f[0];
-    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].f[0] = para->getParH(level)->recvProcessNeighborZ[j].f[0];
+    para->getParD(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->sendProcessNeighborZ[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].f[0];
+    para->getParD(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] =
+        para->getParD(level)->recvProcessNeighborZ[indexOfProcessNeighbor].f[0];
+    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].f[0] =
+        para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].f[0];
 
     // init index*
-    para->getParD(level)->sendProcessNeighborsAfterFtoCZ[j].index = para->getParD(level)->sendProcessNeighborZ[j].index;
-    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].index = para->getParH(level)->sendProcessNeighborZ[j].index;
-    para->getParD(level)->recvProcessNeighborsAfterFtoCZ[j].index = para->getParD(level)->recvProcessNeighborZ[j].index;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].index = para->getParH(level)->recvProcessNeighborZ[j].index;
+    para->getParD(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index =
+        para->getParD(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index =
+        para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index;
+    para->getParD(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index =
+        para->getParD(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].index =
+        para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index;
 
     // rank neighbor
-    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].rankNeighbor = para->getParH(level)->sendProcessNeighborZ[j].rankNeighbor;
-    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].rankNeighbor = para->getParH(level)->recvProcessNeighborZ[j].rankNeighbor;
+    para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor;
+    para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].rankNeighbor =
+        para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor;
 }
 
-void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCX(int direction, int level, int j,
-                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCX(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *sendIndices                    = para->getParH(level)->sendProcessNeighborX[j].index;
-    int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCX[j].numberOfNodes;
-    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j,
+    int *sendIndices = para->getParH(level)->sendProcessNeighborX[indexOfProcessNeighbor].index;
+    int &numberOfSendNodesAfterFtoC =
+        para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes;
+    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCY(int direction, int level, int j,
-                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCY(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *sendIndices                    = para->getParH(level)->sendProcessNeighborY[j].index;
-    int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCY[j].numberOfNodes;
-    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j,
+    int *sendIndices = para->getParH(level)->sendProcessNeighborY[indexOfProcessNeighbor].index;
+    int &numberOfSendNodesAfterFtoC =
+        para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes;
+    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int j,
-                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions) 
+void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoCZ(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *sendIndices                    = para->getParH(level)->sendProcessNeighborZ[j].index;
-    int &numberOfSendNeighborsAfterFtoC = para->getParH(level)->sendProcessNeighborsAfterFtoCZ[j].numberOfNodes;
-    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNeighborsAfterFtoC, direction, level, j,
+    int *sendIndices = para->getParH(level)->sendProcessNeighborZ[indexOfProcessNeighbor].index;
+    int &numberOfSendNodesAfterFtoC =
+        para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes;
+    reorderSendIndicesForCommAfterFtoC(sendIndices, numberOfSendNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNeighborsAfterFtoC,
-                                                       int direction, int level, int j,
-                                                       std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(
+    int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction, int level,
+    std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE
                   << "reorder send indices for communication after fine to coarse: level: " << level
                   << " direction: " << direction;
     if (para->getParH(level)->intCF.kCF == 0 || para->getParH(level)->intFC.kFC == 0)
         *logging::out << logging::Logger::LOGGER_ERROR
-                      << "reorderSendIndicesForCommAfterFtoC(): iCellFCC needs to be inititalized before calling "
+                      << "reorderSendIndicesForCommAfterFtoC(): para->getParH(level)->intCF needs to be inititalized before calling "
                          "this function "
                       << "\n";
 
@@ -224,10 +277,10 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendI
     std::vector<int> sendIndicesOther;
     uint numberOfSendIndices = builder->getNumberOfSendIndices(direction, level);
 
-    //iCellFCC
+    // iCellFCC
     for (uint posInSendIndices = 0; posInSendIndices < numberOfSendIndices; posInSendIndices++) {
         sparseIndexSend = sendIndices[posInSendIndices];
-        if (isSparseIndexInICellFCC(para->getParH(level)->intFC.kFC, sparseIndexSend, level)){
+        if (isSparseIndexInICellFCC(para->getParH(level)->intFC.kFC, sparseIndexSend, level)) {
             addUniqueIndexToCommunicationVectors(sendIndicesAfterFtoC, sparseIndexSend,
                                                  sendIndicesForCommAfterFtoCPositions, posInSendIndices);
         }
@@ -240,25 +293,25 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(int *sendI
         findIfSparseIndexIsInSendIndicesAndAddToCommVectors(sparseIndex, sendIndices, numberOfSendIndices,
                                                             sendIndicesAfterFtoC, sendIndicesForCommAfterFtoCPositions);
 
-    numberOfSendNeighborsAfterFtoC = (int)sendIndicesAfterFtoC.size();
+    numberOfSendNodesAfterFtoC = (int)sendIndicesAfterFtoC.size();
 
     findIndicesNotInCommAfterFtoC(numberOfSendIndices, sendIndices, sendIndicesAfterFtoC, sendIndicesOther);
 
     // copy new vectors back to sendIndices array
-    for (int i = 0; i < numberOfSendNeighborsAfterFtoC; i++)
+    for (int i = 0; i < numberOfSendNodesAfterFtoC; i++)
         sendIndices[i] = sendIndicesAfterFtoC[i];
     for (uint i = 0; i < (uint)sendIndicesOther.size(); i++)
-        sendIndices[i + numberOfSendNeighborsAfterFtoC] = sendIndicesOther[i];
+        sendIndices[i + numberOfSendNodesAfterFtoC] = sendIndicesOther[i];
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process "
                   << " " << vf::gpu::Communicator::getInstanz()->getPID()
-                  << " numberOfSendNeighborsAfterFtoC: " << numberOfSendNeighborsAfterFtoC << "\n ";
+                  << " numberOfSendNodesAfterFtoC: " << numberOfSendNodesAfterFtoC << "\n ";
 
-    if (numberOfSendNeighborsAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) {
+    if (numberOfSendNodesAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) {
         *logging::out << logging::Logger::LOGGER_ERROR
                       << "reorderSendIndicesForCommAfterFtoC(): incorrect number of nodes"
                       << "\n";
-        std::cout << "numberOfSendNeighborsAfterFtoC = " << numberOfSendNeighborsAfterFtoC
+        std::cout << "numberOfSendNodesAfterFtoC = " << numberOfSendNodesAfterFtoC
                   << ", sendOrIndicesOther.size() = " << sendIndicesOther.size()
                   << ", numberOfSendOrRecvIndices = " << numberOfSendIndices << std::endl;
     }
@@ -292,7 +345,7 @@ void IndexRearrangementForStreams::aggregateNodesInICellCFC(int level, std::vect
         nodesCFC.push_back(neighborY[neighborX[sparseIndex]]);
         nodesCFC.push_back(neighborZ[neighborX[sparseIndex]]);
         nodesCFC.push_back(neighborZ[neighborY[sparseIndex]]);
-        nodesCFC.push_back(neighborZ[neighborY[neighborX[sparseIndex]]]);           
+        nodesCFC.push_back(neighborZ[neighborY[neighborX[sparseIndex]]]);
     }
 
     // remove duplicate nodes
@@ -305,8 +358,9 @@ void IndexRearrangementForStreams::addUniqueIndexToCommunicationVectors(
     std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend,
     std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions, uint &posInSendIndices) const
 {
-    // add index to corresponding vectors but omit indices which are already in sendIndicesAfterFtoC
-    if (std::find(sendIndicesAfterFtoC.begin(), sendIndicesAfterFtoC.end(), sparseIndexSend) == sendIndicesAfterFtoC.end()) {
+    // add index to corresponding vectors, but omit indices which are already in sendIndicesAfterFtoC
+    if (std::find(sendIndicesAfterFtoC.begin(), sendIndicesAfterFtoC.end(), sparseIndexSend) ==
+        sendIndicesAfterFtoC.end()) {
         sendIndicesAfterFtoC.push_back(sparseIndexSend);
         sendIndicesForCommAfterFtoCPositions.push_back(posInSendIndices);
     }
@@ -341,37 +395,37 @@ void IndexRearrangementForStreams::findIndicesNotInCommAfterFtoC(const uint &num
     }
 }
 
-void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int j,
-                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCX(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *recvIndices                    = para->getParH(level)->recvProcessNeighborX[j].index;
-    int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCX[j].numberOfNodes;
-    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j,
+    int *recvIndices                    = para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].index;
+    int &numberOfRecvNodesAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes;
+    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int j,
-                                                       std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCY(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *recvIndices                    = para->getParH(level)->recvProcessNeighborY[j].index;
-    int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCY[j].numberOfNodes;
-    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j,
+    int *recvIndices                    = para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].index;
+    int &numberOfRecvNodesAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes;
+    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int j,
-                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoCZ(
+    int direction, int level, int indexOfProcessNeighbor, std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
-    int *recvIndices                    = para->getParH(level)->recvProcessNeighborZ[j].index;
-    int &numberOfRecvNeighborsAfterFtoC = para->getParH(level)->recvProcessNeighborsAfterFtoCZ[j].numberOfNodes;
-    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNeighborsAfterFtoC, direction, level, j,
+    int *recvIndices = para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].index;
+    int &numberOfRecvNodesAfterFtoC =
+        para->getParH(level)->recvProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes;
+    reorderRecvIndicesForCommAfterFtoC(recvIndices, numberOfRecvNodesAfterFtoC, direction, level,
                                        sendIndicesForCommAfterFtoCPositions);
 }
 
-void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(int *recvIndices,
-                                                       int &numberOfRecvNeighborsAfterFtoC, int direction, int level,
-                                                       int j,
-                                                       std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
+void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(
+    int *recvIndices, int &numberOfRecvNodesAfterFtoC, int direction, int level,
+    std::vector<uint> &sendIndicesForCommAfterFtoCPositions)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE
                   << "reorder receive indices for communication after fine to coarse: level: " << level
@@ -391,23 +445,23 @@ void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(int *recvI
 
     findIndicesNotInCommAfterFtoC(numberOfRecvIndices, recvIndices, recvIndicesAfterFtoC, recvIndicesOther);
 
-    numberOfRecvNeighborsAfterFtoC = (int)recvIndicesAfterFtoC.size();
+    numberOfRecvNodesAfterFtoC = (int)recvIndicesAfterFtoC.size();
 
     // copy new vectors back to sendIndices array
-    for (int i = 0; i < numberOfRecvNeighborsAfterFtoC; i++)
+    for (int i = 0; i < numberOfRecvNodesAfterFtoC; i++)
         recvIndices[i] = recvIndicesAfterFtoC[i];
     for (uint i = 0; i < (uint)recvIndicesOther.size(); i++)
-        recvIndices[i + numberOfRecvNeighborsAfterFtoC] = recvIndicesOther[i];
+        recvIndices[i + numberOfRecvNodesAfterFtoC] = recvIndicesOther[i];
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "... Process "
                   << " " << vf::gpu::Communicator::getInstanz()->getPID()
-                  << " numberOfRecvNeighborsAfterFtoC: " << numberOfRecvNeighborsAfterFtoC << "\n ";
+                  << " numberOfRecvNodesAfterFtoC: " << numberOfRecvNodesAfterFtoC << "\n ";
 
-    if (numberOfRecvNeighborsAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) {
+    if (numberOfRecvNodesAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) {
         *logging::out << logging::Logger::LOGGER_ERROR
                       << "reorderRecvIndicesForCommAfterFtoC(): incorrect number of nodes"
                       << "\n";
-        std::cout << "numberOfRecvNeighborsAfterFtoC = " << numberOfRecvNeighborsAfterFtoC
+        std::cout << "numberOfRecvNodesAfterFtoC = " << numberOfRecvNodesAfterFtoC
                   << ", recvIndicesOther.size() = " << recvIndicesOther.size()
                   << ", numberOfRecvIndices = " << numberOfRecvIndices << std::endl;
     }
@@ -429,8 +483,6 @@ void IndexRearrangementForStreams::splitFineToCoarseIntoBorderAndBulk(const uint
 
 void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkFC(int level)
 {
-    // this function reorders the arrays of FCC/FCF indices and return pointers and sizes of the new subarrays
-
     // create some local variables for better readability
     uint *iCellFccAll = para->getParH(level)->intFC.ICellFCC;
     uint *iCellFcfAll = para->getParH(level)->intFC.ICellFCF;
@@ -460,6 +512,7 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkFC(int level
     para->getParH(level)->intFCBulk.ICellFCF   = iCellFcfAll + para->getParH(level)->intFCBorder.kFC;
 
     // copy the created vectors to the memory addresses of the old arrays
+    // this is inefficient :(
     for (uint i = 0; i < (uint)iCellFccBorderVector.size(); i++) {
         iCellFccAll[i] = iCellFccBorderVector[i];
         iCellFcfAll[i] = iCellFcfBorderVector[i];
@@ -477,25 +530,25 @@ void IndexRearrangementForStreams::splitCoarseToFineIntoBorderAndBulk(const uint
     para->getParD(level)->intCFBorder.kCF      = para->getParH(level)->intCFBorder.kCF;
     para->getParD(level)->intCFBulk.kCF        = para->getParH(level)->intCFBulk.kCF;
     para->getParD(level)->intCFBorder.ICellCFC = para->getParD(level)->intCF.ICellCFC;
-    para->getParD(level)->intCFBulk.ICellCFC   = para->getParD(level)->intCFBorder.ICellCFC + para->getParD(level)->intCFBorder.kCF;
+    para->getParD(level)->intCFBulk.ICellCFC =
+        para->getParD(level)->intCFBorder.ICellCFC + para->getParD(level)->intCFBorder.kCF;
     para->getParD(level)->intCFBorder.ICellCFF = para->getParD(level)->intCF.ICellCFF;
-    para->getParD(level)->intCFBulk.ICellCFF   = para->getParD(level)->intCFBorder.ICellCFF + para->getParD(level)->intCFBorder.kCF;
-    para->getParD(level)->offCFBulk.xOffCF     = para->getParD(level)->offCF.xOffCF + para->getParD(level)->intCFBorder.kCF;
-    para->getParD(level)->offCFBulk.yOffCF     = para->getParD(level)->offCF.yOffCF + para->getParD(level)->intCFBorder.kCF;
-    para->getParD(level)->offCFBulk.zOffCF     = para->getParD(level)->offCF.zOffCF + para->getParD(level)->intCFBorder.kCF;
+    para->getParD(level)->intCFBulk.ICellCFF =
+        para->getParD(level)->intCFBorder.ICellCFF + para->getParD(level)->intCFBorder.kCF;
+    para->getParD(level)->offCFBulk.xOffCF = para->getParD(level)->offCF.xOffCF + para->getParD(level)->intCFBorder.kCF;
+    para->getParD(level)->offCFBulk.yOffCF = para->getParD(level)->offCF.yOffCF + para->getParD(level)->intCFBorder.kCF;
+    para->getParD(level)->offCFBulk.zOffCF = para->getParD(level)->offCF.zOffCF + para->getParD(level)->intCFBorder.kCF;
 }
 
-void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level) 
-{ 
-    // this function reorders the arrays of CFC/CFF indices and sets pointers and sizes of the new subarrays
-     
+void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level)
+{
     // create some local variables for better readability
-    uint *iCellCfcAll    = para->getParH(level)->intCF.ICellCFC;
-    uint *iCellCffAll    = para->getParH(level)->intCF.ICellCFF;
-    uint *neighborX_SP   = this->para->getParH(level)->neighborX_SP;
-    uint *neighborY_SP   = this->para->getParH(level)->neighborY_SP;
-    uint *neighborZ_SP   = this->para->getParH(level)->neighborZ_SP;
-    auto grid            = this->builder->getGrid((uint)level);
+    uint *iCellCfcAll  = para->getParH(level)->intCF.ICellCFC;
+    uint *iCellCffAll  = para->getParH(level)->intCF.ICellCFF;
+    uint *neighborX_SP = this->para->getParH(level)->neighborX_SP;
+    uint *neighborY_SP = this->para->getParH(level)->neighborY_SP;
+    uint *neighborZ_SP = this->para->getParH(level)->neighborZ_SP;
+    auto grid          = this->builder->getGrid((uint)level);
 
     std::vector<uint> iCellCfcBorderVector;
     std::vector<uint> iCellCfcBulkVector;
@@ -520,7 +573,8 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]) ||
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborX_SP[sparseIndexOfICellBSW]]) ||
             grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborY_SP[sparseIndexOfICellBSW]]) ||
-            grid->isSparseIndexInFluidNodeIndicesBorder(neighborZ_SP[neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]])) {
+            grid->isSparseIndexInFluidNodeIndicesBorder(
+                neighborZ_SP[neighborY_SP[neighborX_SP[sparseIndexOfICellBSW]]])) {
 
             iCellCfcBorderVector.push_back(iCellCfcAll[i]);
             iCellCffBorderVector.push_back(iCellCffAll[i]);
@@ -541,13 +595,16 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level
     para->getParH(level)->intCFBorder.ICellCFF = para->getParH(level)->intCF.ICellCFF;
     para->getParH(level)->intCFBorder.kCF      = (uint)iCellCfcBorderVector.size();
     para->getParH(level)->intCFBulk.kCF        = (uint)iCellCfcBulkVector.size();
-    para->getParH(level)->intCFBulk.ICellCFC   = para->getParH(level)->intCF.ICellCFC + para->getParH(level)->intCFBorder.kCF;
-    para->getParH(level)->intCFBulk.ICellCFF   = para->getParH(level)->intCF.ICellCFF + para->getParH(level)->intCFBorder.kCF;
-    para->getParH(level)->offCFBulk.xOffCF     = para->getParH(level)->offCF.xOffCF + para->getParH(level)->intCFBorder.kCF;
-    para->getParH(level)->offCFBulk.yOffCF     = para->getParH(level)->offCF.yOffCF + para->getParH(level)->intCFBorder.kCF;
-    para->getParH(level)->offCFBulk.zOffCF     = para->getParH(level)->offCF.zOffCF + para->getParH(level)->intCFBorder.kCF;
+    para->getParH(level)->intCFBulk.ICellCFC =
+        para->getParH(level)->intCF.ICellCFC + para->getParH(level)->intCFBorder.kCF;
+    para->getParH(level)->intCFBulk.ICellCFF =
+        para->getParH(level)->intCF.ICellCFF + para->getParH(level)->intCFBorder.kCF;
+    para->getParH(level)->offCFBulk.xOffCF = para->getParH(level)->offCF.xOffCF + para->getParH(level)->intCFBorder.kCF;
+    para->getParH(level)->offCFBulk.yOffCF = para->getParH(level)->offCF.yOffCF + para->getParH(level)->intCFBorder.kCF;
+    para->getParH(level)->offCFBulk.zOffCF = para->getParH(level)->offCF.zOffCF + para->getParH(level)->intCFBorder.kCF;
 
     // copy the created vectors to the memory addresses of the old arrays
+    // this is inefficient :(
     for (uint i = 0; i < (uint)iCellCfcBorderVector.size(); i++) {
         para->getParH(level)->intCFBorder.ICellCFC[i] = iCellCfcBorderVector[i];
         para->getParH(level)->intCFBorder.ICellCFF[i] = iCellCffBorderVector[i];
@@ -556,10 +613,10 @@ void IndexRearrangementForStreams::getGridInterfaceIndicesBorderBulkCF(int level
         para->getParH(level)->offCF.zOffCF[i]         = zOffCFBorderVector[i];
     }
     for (uint i = 0; i < (uint)iCellCfcBulkVector.size(); i++) {
-        para->getParH(level)->intCFBulk.ICellCFC[i]                       = iCellCfcBulkVector[i];
-        para->getParH(level)->intCFBulk.ICellCFF[i]                       = iCellCffBulkVector[i];
-        para->getParH(level)->offCF.xOffCF[i + xOffCFBorderVector.size()] = xOffCFBulkVector[i];
-        para->getParH(level)->offCF.yOffCF[i + yOffCFBorderVector.size()] = yOffCFBulkVector[i];
-        para->getParH(level)->offCF.zOffCF[i + zOffCFBorderVector.size()] = zOffCFBulkVector[i];
+        para->getParH(level)->intCFBulk.ICellCFC[i] = iCellCfcBulkVector[i];
+        para->getParH(level)->intCFBulk.ICellCFF[i] = iCellCffBulkVector[i];
+        para->getParH(level)->offCFBulk.xOffCF[i]   = xOffCFBulkVector[i];
+        para->getParH(level)->offCFBulk.yOffCF[i]   = yOffCFBulkVector[i];
+        para->getParH(level)->offCFBulk.zOffCF[i]   = zOffCFBulkVector[i];
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
index fb1e0695071fff38d52730b3c94fef0b1d67a93e..65ee08666247308c3cdf1e533106189b441a325e 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
@@ -1,11 +1,16 @@
+//! \file IndexRearrangementForStreams.h
+//! \ingroup GPU
+//! \author Anna Wellmann
+//! \ref master thesis of Anna Wellmann
+
 #ifndef IndexRearrangementForStreams_H
 #define IndexRearrangementForStreams_H
 
-#include "../GridProvider.h"
+#include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h>
 
-#include <vector>
-#include <string>
 #include <memory>
+#include <string>
+#include <vector>
 
 #include "LBM/LB.h"
 
@@ -13,69 +18,144 @@ class Parameter;
 class GridBuilder;
 namespace vf
 {
-	namespace gpu
-	{
-		class Communicator;
-	}
+namespace gpu
+{
+class Communicator;
 }
+} // namespace vf
 
 class IndexRearrangementForStreams
 {
 private:
-	std::shared_ptr<GridBuilder> builder;
+    std::shared_ptr<GridBuilder> builder;
     std::shared_ptr<Parameter> para;
 
 public:
+    //! \brief construct IndexRearrangementForStreams object
     IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder);
-    
+
+    //////////////////////////////////////////////////////////////////////////
     // communication after coarse to fine
+    //////////////////////////////////////////////////////////////////////////
+
+    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in x direction
+    //! \details Only the nodes involved in the interpolation need to be exchanged. Therefore in this method all nodes,
+    //! which are part of the interpolation as well as the communication, are identified.
+    //!
+    //! \ref see master thesis of Anna
+    //! Wellmann (p. 59-62: "Reduzieren der auszutauschenden Knoten")
     void initCommunicationArraysForCommAfterFinetoCoarseX(const uint &level, int j, int direction);
+    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in y direction
+    //! \details --> see x direction
     void initCommunicationArraysForCommAfterFinetoCoarseY(const uint &level, int j, int direction);
+    //! \brief initialize the arrays for the communication after the interpolation from fine to coarse in z direction
+    //! \details --> see x direction
     void initCommunicationArraysForCommAfterFinetoCoarseZ(const uint &level, int j, int direction);
 
+public:
+    //////////////////////////////////////////////////////////////////////////
     // split interpolation cells
+    //////////////////////////////////////////////////////////////////////////
+
+    //! \brief split the interpolation cells from coarse to fine into border an bulk
+    //! \details For communication hiding, the interpolation cells from the coarse to the fine grid need to be split
+    //! into two groups:
+    //!
+    //! - cells which are at the border between two gpus --> "border"
+    //!
+    //! - the other cells which are not directly related to the communication between the two gpus --> "bulk"
+    //!
+    //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")
     void splitCoarseToFineIntoBorderAndBulk(const uint &level);
-    void splitFineToCoarseIntoBorderAndBulk(const uint &level);
 
+    //! \brief split the interpolation cells from fine to coarse into border an bulk
+    //! \details For communication hiding, the interpolation cells from the fine to the coarse grid need to be split
+    //! into two groups:
+    //!
+    //! - cells which are at the border between two gpus --> "border"
+    //!
+    //! - the other cells which are not directly related to the communication between the two gpus --> "bulk"
+    //!
+    //! \ref see master thesis of Anna Wellmann (p. 62-68: "Ãœberdeckung der reduzierten Kommunikation")
+    void splitFineToCoarseIntoBorderAndBulk(const uint &level);
 
 private:
+    //////////////////////////////////////////////////////////////////////////
     // communication after coarse to fine
-    void copyProcessNeighborToCommAfterFtoCX(const uint &level, int j);
-    void copyProcessNeighborToCommAfterFtoCY(const uint &level, int j);
-    void copyProcessNeighborToCommAfterFtoCZ(const uint &level, int j);
+    //////////////////////////////////////////////////////////////////////////
+
+    //! \brief inits pointers for reduced communication after interpolation fine to coarse by copying them from "normal"
+    //! communication
+    void copyProcessNeighborToCommAfterFtoCX(const uint &level, int indexOfProcessNeighbor);
+    void copyProcessNeighborToCommAfterFtoCY(const uint &level, int indexOfProcessNeighbor);
+    void copyProcessNeighborToCommAfterFtoCZ(const uint &level, int indexOfProcessNeighbor);
 
-    void reorderSendIndicesForCommAfterFtoCX(int direction, int level, int j,
+    void reorderSendIndicesForCommAfterFtoCX(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderSendIndicesForCommAfterFtoCY(int direction, int level, int j,
+    void reorderSendIndicesForCommAfterFtoCY(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int j,
+    void reorderSendIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNeighborsAfterFtoC, int direction,
-                                            int level, int j, std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
 
+    //! \brief the send indices are reordered for the communication after the interpolation from fine to coarse
+    //! \details The indices of nodes which are part of the interpolation are moved to the front of vector with the send
+    //! indices. 
+    //! \pre para->getParH(level)->intCF needs to be inititalized 
+    //! \param sendIndices is the pointer to the vector with the send indices, which will be reordered in this function
+    //! \param numberOfSendNodesAfterFtoC will be set in this method 
+    //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering
+    void reorderSendIndicesForCommAfterFtoC(int *sendIndices, int &numberOfSendNodesAfterFtoC, int direction,
+                                            int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
+    //! \brief check if a sparse index occurs in the ICellFCC
     bool isSparseIndexInICellFCC(uint sizeOfICellFCC, int sparseIndexSend, int level);
+    //! \brief aggregate all nodes in the coarse cells for the interpolation in coarse to fine
+    //! \details For the coarse cells in the interpolation from coarse to fine only one node is stored. This methods
+    //! looks for the other nodes of each cell and puts them into vector. Duplicate nodes are only stored once.
     void aggregateNodesInICellCFC(int level, std::vector<uint> &nodesCFC);
+    //! \brief add index to sendIndicesAfterFtoC and sendIndicesForCommAfterFtoCPositions, but omit indices which are already in sendIndicesAfterFtoC
     void addUniqueIndexToCommunicationVectors(std::vector<int> &sendIndicesAfterFtoC, int &sparseIndexSend,
                                               std::vector<unsigned int> &sendIndicesForCommAfterFtoCPositions,
                                               uint &posInSendIndices) const;
-    void findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices,
-                                                             std::vector<int> &sendIndicesAfterFtoC,
-                                                             std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const;
+    //! \brief find if a sparse index is a send index. If true, call addUniqueIndexToCommunicationVectors()
+    void
+    findIfSparseIndexIsInSendIndicesAndAddToCommVectors(int sparseIndex, int *sendIndices, uint numberOfSendIndices,
+                                                        std::vector<int> &sendIndicesAfterFtoC,
+                                                        std::vector<uint> &sendIndicesForCommAfterFtoCPositions) const;
+    //! \brief find all indices which are not part of the communication after the interpolation from fine to coarse
     void findIndicesNotInCommAfterFtoC(const uint &numberOfSendOrRecvIndices, int *sendOrReceiveIndices,
                                        std::vector<int> &sendOrReceiveIndicesAfterFtoC,
                                        std::vector<int> &sendOrIndicesOther);
 
-    void reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int j,
+    void reorderRecvIndicesForCommAfterFtoCX(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int j,
+    void reorderRecvIndicesForCommAfterFtoCY(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int j,
+    void reorderRecvIndicesForCommAfterFtoCZ(int direction, int level, int indexOfProcessNeighbor,
                                              std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    void reorderRecvIndicesForCommAfterFtoC(int *recvIndices, int &numberOfRecvNeighborsAfterFtoC, int direction,
-                                            int level, int j, std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
-    
+                                             
+    //! \brief reorder the receive indices in the same way that the send indices were reordered.
+    //! \details When the send indices are reordered, the receive indices need to be reordered accordingly.
+    //! \pre sendIndicesForCommAfterFtoCPositions should not be empty
+    //! \param recvIndices is the pointer to the vector with the receive indices, which will be reordered in this function
+    //! \param numberOfRecvNodesAfterFtoC will be set in this function
+    //! \param sendIndicesForCommAfterFtoCPositions stores each sendIndex's positions before reordering and is used to reorder the receive indices in the same way
+    void reorderRecvIndicesForCommAfterFtoC(int *recvIndices, int &numberOfRecvNodesAfterFtoC, int direction,
+                                            int level, std::vector<uint> &sendIndicesForCommAfterFtoCPositions);
+
+private:
+    //////////////////////////////////////////////////////////////////////////
     // split interpolation cells
+    //////////////////////////////////////////////////////////////////////////
+
+    //! \brief This function reorders the arrays of CFC/CFF indices and sets the pointers and sizes of the new
+    //! subarrays: \details The coarse cells for interpolation from coarse to fine (iCellCFC) are divided into two
+    //! subgroups: border and bulk. The fine cells (iCellCFF) are reordered accordingly. The offset cells (xOffCF,
+    //! yOffCF, zOffCF) must be reordered in the same way.
     void getGridInterfaceIndicesBorderBulkCF(int level);
+
+    //! \brief This function reorders the arrays of FCC/FCF indices and return pointers and sizes of the new subarrays:
+    //! \details The coarse cells for interpolation from fine to coarse (iCellFCC) are divided into two subgroups:
+    //! border and bulk. The fine cells (iCellFCF) are reordered accordingly.
     void getGridInterfaceIndicesBorderBulkFC(int level);
 };
 
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..e414d4f3173e555b8944fa9637ebbd2023ce393c
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cfg
@@ -0,0 +1,3 @@
+# these two parameters need to be defined in each config file
+Path = /output/path
+GridPath = /path/to/grid
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2982aa5c4eddbaba53473c57b87a6a1860d76f2e
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
@@ -0,0 +1,248 @@
+#include <gmock/gmock.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <iostream>
+
+#include <Parameter/Parameter.h>
+#include <basics/config/ConfigurationFile.h>
+
+#include <DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h>
+#include <gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h>
+#include <gpu/GridGenerator/grid/GridImp.h>
+
+template <typename T>
+bool vectorsAreEqual(T *vector1, std::vector<T> vectorExpected)
+{
+    for (uint i = 0; i < vectorExpected.size(); i++) {
+        if (vector1[i] != vectorExpected[i])
+            return false;
+    }
+    return true;
+}
+
+class LevelGridBuilderDouble : public LevelGridBuilder
+{
+private:
+    SPtr<Grid> grid;
+    LevelGridBuilderDouble() = default;
+
+public:
+    LevelGridBuilderDouble(SPtr<Grid> grid) : LevelGridBuilder(Device(), ""), grid(grid){};
+    SPtr<Grid> getGrid(uint level) override { return grid; };
+    std::shared_ptr<Grid> getGrid(int level, int box) override { return grid; };
+};
+
+class GridImpDouble : public GridImp
+{
+private:
+    std::vector<uint> fluidNodeIndicesBorder;
+
+public:
+    GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
+                  SPtr<GridStrategy> gridStrategy, Distribution d, uint level)
+        : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level)
+    {
+    }
+
+    static SPtr<GridImpDouble> makeShared(Object *object, real startX, real startY, real startZ, real endX, real endY,
+                                          real endZ, real delta, SPtr<GridStrategy> gridStrategy, Distribution d,
+                                          uint level)
+    {
+        SPtr<GridImpDouble> grid(
+            new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, gridStrategy, d, level));
+        return grid;
+    }
+
+    void setFluidNodeIndicesBorder(std::vector<uint> fluidNodeIndicesBorder)
+    {
+        this->fluidNodeIndicesBorder = fluidNodeIndicesBorder;
+    }
+
+    bool isSparseIndexInFluidNodeIndicesBorder(uint &sparseIndex) const override
+    {
+        return std::find(this->fluidNodeIndicesBorder.begin(), this->fluidNodeIndicesBorder.end(), sparseIndex) !=
+               this->fluidNodeIndicesBorder.end();
+    }
+};
+
+struct CFBorderBulk {
+    // data to work on
+    std::vector<uint> fluidNodeIndicesBorder = { 10, 11, 12, 13, 14, 15, 16 };
+    std::vector<uint> iCellCFC               = { 1, 11, 3, 13, 5, 15, 7 };
+    std::vector<uint> iCellCFF               = { 2, 12, 4, 14, 6, 16, 8 };
+    uint sizeOfICellCf                       = (uint)iCellCFC.size();
+    uint neighborX_SP[17]                    = { 0u };
+    uint neighborY_SP[17]                    = { 0u };
+    uint neighborZ_SP[17]                    = { 0u };
+    int level                                = 0;
+    std::vector<real> offsetCFx              = { 1, 11, 3, 13, 5, 15, 7 };
+    std::vector<real> offsetCFy              = { 101, 111, 103, 113, 105, 115, 107 };
+    std::vector<real> offsetCFz              = { 1001, 1011, 1003, 1013, 1005, 1015, 1007 };
+
+    // expected data
+    std::vector<uint> iCellCfcBorder_expected   = { 11, 13, 15 };
+    std::vector<uint> iCellCfcBulk_expected     = { 1, 3, 5, 7 };
+    std::vector<uint> iCellCffBorder_expected   = { 12, 14, 16 };
+    std::vector<uint> iCellCffBulk_expected     = { 2, 4, 6, 8 };
+    std::vector<real> offsetCFx_Border_expected = { 11, 13, 15 };
+    std::vector<real> offsetCFx_Bulk_expected   = { 1, 3, 5, 7 };
+    std::vector<real> offsetCFy_Border_expected = { 111, 113, 115 };
+    std::vector<real> offsetCFy_Bulk_expected   = { 101, 103, 105, 107 };
+    std::vector<real> offsetCFz_Border_expected = { 1011, 1013, 1015 };
+    std::vector<real> offsetCFz_Bulk_expected   = { 1001, 1003, 1005, 1007 };
+};
+
+struct FCBorderBulk {
+    // data to work on
+    std::vector<uint> fluidNodeIndicesBorder = { 110, 111, 112, 113, 114, 115, 116 };
+    std::vector<uint> iCellFCC               = { 11, 111, 13, 113, 15, 115, 17 };
+    std::vector<uint> iCellFCF               = { 12, 112, 14, 114, 16, 116, 18 };
+    uint sizeOfICellFC                       = (uint)iCellFCC.size();
+    int level                                = 1;
+
+    // expected data
+    std::vector<uint> iCellFccBorder_expected = { 111, 113, 115 };
+    std::vector<uint> iCellFccBulk_expected   = { 11, 13, 15, 17 };
+    std::vector<uint> iCellFcfBorder_expected = { 112, 114, 116 };
+    std::vector<uint> iCellFcfBulk_expected   = { 12, 14, 16, 18 };
+};
+
+static SPtr<Parameter> initParameterClass()
+{
+    std::filesystem::path filePath = __FILE__; //  assuming that the config file is stored parallel to this file.
+    filePath.replace_filename("IndexRearrangementForStreamsTest.cfg");
+    vf::basics::ConfigurationFile config;
+    config.load(filePath.string());
+    return std::make_shared<Parameter>(config, 1, 0);
+}
+
+class IndexRearrangementForStreamsTest_IndicesCFBorderBulkTest : public testing::Test
+{
+public:
+    CFBorderBulk cf;
+    SPtr<Parameter> para;
+
+private:
+    static std::unique_ptr<IndexRearrangementForStreams> createTestSubjectCFBorderBulk(CFBorderBulk &cf,
+                                                                                       std::shared_ptr<Parameter> para)
+    {
+        SPtr<GridImpDouble> grid =
+            GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1);
+        grid->setFluidNodeIndicesBorder(cf.fluidNodeIndicesBorder);
+        std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid);
+
+        para->setMaxLevel(cf.level + 1); // setMaxLevel resizes parH and parD
+        para->parH[cf.level]                    = std::make_shared<LBMSimulationParameter>();
+        para->parD[cf.level]                    = std::make_shared<LBMSimulationParameter>();
+        para->getParH(cf.level)->intCF.ICellCFC = &(cf.iCellCFC.front());
+        para->getParH(cf.level)->intCF.ICellCFF = &(cf.iCellCFF.front());
+        para->getParH(cf.level)->neighborX_SP   = cf.neighborX_SP;
+        para->getParH(cf.level)->neighborY_SP   = cf.neighborY_SP;
+        para->getParH(cf.level)->neighborZ_SP   = cf.neighborZ_SP;
+        para->getParH(cf.level)->intCF.kCF      = cf.sizeOfICellCf;
+        para->getParH(cf.level)->offCF.xOffCF   = &(cf.offsetCFx.front());
+        para->getParH(cf.level)->offCF.yOffCF   = &(cf.offsetCFy.front());
+        para->getParH(cf.level)->offCF.zOffCF   = &(cf.offsetCFz.front());
+
+        return std::make_unique<IndexRearrangementForStreams>(para, builder);
+    };
+
+    void SetUp() override
+    {
+        para             = initParameterClass();
+        auto testSubject = createTestSubjectCFBorderBulk(cf, para);
+        testSubject->splitCoarseToFineIntoBorderAndBulk(cf.level);
+    }
+};
+
+class IndexRearrangementForStreamsTest_IndicesFCBorderBulkTest : public testing::Test
+{
+public:
+    FCBorderBulk fc;
+    SPtr<Parameter> para;
+
+private:
+    static std::unique_ptr<IndexRearrangementForStreams> createTestSubjectFCBorderBulk(FCBorderBulk &fc,
+                                                                                       std::shared_ptr<Parameter> para)
+    {
+        SPtr<GridImpDouble> grid =
+            GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, nullptr, Distribution(), 1);
+        grid->setFluidNodeIndicesBorder(fc.fluidNodeIndicesBorder);
+        std::shared_ptr<LevelGridBuilderDouble> builder = std::make_shared<LevelGridBuilderDouble>(grid);
+
+        para->setMaxLevel(fc.level + 1); // setMaxLevel resizes parH and parD
+        para->parH[fc.level]                    = std::make_shared<LBMSimulationParameter>();
+        para->parD[fc.level]                    = std::make_shared<LBMSimulationParameter>();
+        para->getParH(fc.level)->intFC.ICellFCC = &(fc.iCellFCC.front());
+        para->getParH(fc.level)->intFC.ICellFCF = &(fc.iCellFCF.front());
+        para->getParH(fc.level)->intFC.kFC      = fc.sizeOfICellFC;
+
+        return std::make_unique<IndexRearrangementForStreams>(para, builder);
+    };
+
+    void SetUp() override
+    {
+        para             = initParameterClass();
+        auto testSubject = createTestSubjectFCBorderBulk(fc, para);
+        testSubject->splitFineToCoarseIntoBorderAndBulk(fc.level);
+    }
+};
+
+TEST_F(IndexRearrangementForStreamsTest_IndicesCFBorderBulkTest, splitCoarseToFineIntoBorderAndBulk)
+{
+    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF + para->getParH(cf.level)->intCFBulk.kCF,
+                testing::Eq(cf.sizeOfICellCf))
+        << "The number of interpolation cells from coarse to fine changed during reordering.";
+
+    // check coarse to fine border (coarse nodes)
+    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCfcBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFC, cf.iCellCfcBorder_expected))
+        << "intCFBorder.ICellCFC does not match the expected border vector";
+    // check coarse to fine border (fine nodes)
+    EXPECT_THAT(para->getParH(cf.level)->intCFBorder.kCF, testing::Eq((uint)cf.iCellCffBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBorder.ICellCFF, cf.iCellCffBorder_expected))
+        << "intCFBorder.ICellCFF does not match the expected border vector";
+
+    // check coarse to fine bulk (coarse nodes)
+    EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCfcBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFC, cf.iCellCfcBulk_expected))
+        << "intCFBulk.ICellCFC does not match the expected bulk vector";
+    // check coarse to fine bulk (fine nodes)
+    EXPECT_THAT(para->getParH(cf.level)->intCFBulk.kCF, testing::Eq((uint)cf.iCellCffBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->intCFBulk.ICellCFF, cf.iCellCffBulk_expected))
+        << "intCFBulk.ICellCFF does not match the expected bulk vector";
+
+    // check offset cells
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.xOffCF, cf.offsetCFx_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.xOffCF, cf.offsetCFx_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.yOffCF, cf.offsetCFy_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.yOffCF, cf.offsetCFy_Bulk_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCF.zOffCF, cf.offsetCFz_Border_expected));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(cf.level)->offCFBulk.zOffCF, cf.offsetCFz_Bulk_expected));
+}
+
+TEST_F(IndexRearrangementForStreamsTest_IndicesFCBorderBulkTest, splitFineToCoarseIntoBorderAndBulk)
+{
+    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC + para->getParH(fc.level)->intFCBulk.kFC,
+                testing::Eq(fc.sizeOfICellFC))
+        << "The number of interpolation cells from coarse to fine changed during reordering.";
+
+    // check coarse to fine border (coarse nodes)
+    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFccBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCC, fc.iCellFccBorder_expected))
+        << "intFCBorder.ICellFCC does not match the expected border vector";
+    // check coarse to fine border (fine nodes)
+    EXPECT_THAT(para->getParH(fc.level)->intFCBorder.kFC, testing::Eq((uint)fc.iCellFcfBorder_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBorder.ICellFCF, fc.iCellFcfBorder_expected))
+        << "intFCBorder.ICellFCF does not match the expected border vector";
+
+    // check coarse to fine bulk (coarse nodes)
+    EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFccBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCC, fc.iCellFccBulk_expected))
+        << "intFCBulk.ICellFCC does not match the expected bulk vector";
+    // check coarse to fine bulk (fine nodes)
+    EXPECT_THAT(para->getParH(fc.level)->intFCBulk.kFC, testing::Eq((uint)fc.iCellFcfBulk_expected.size()));
+    EXPECT_TRUE(vectorsAreEqual(para->getParH(fc.level)->intFCBulk.ICellFCF, fc.iCellFcfBulk_expected))
+        << "intFCBulk.ICellFCF does not match the expected bulk vector";
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
index da37bb1e2fee675cb07410d483dac21dc84215dc..f57fd9dd9bc2a372c7790bf8f3837e69d1d52beb 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
@@ -26,9 +26,9 @@
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file Cumulant27chim.cu
+//! \file Cumulant27chimStream.cu
 //! \ingroup GPU
-//! \author Martin Schoenherr
+//! \author Martin Schoenherr, Anna Wellmann
 //=======================================================================================
 /* Device code */
 #include "LBM/LB.h" 
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 7019eb6cc56a93f21bb6559de74d74a84fcb24ea..bf31cc6d8c95453d1dd05d355a793d49e6864c4f 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -1486,7 +1486,7 @@ bool Parameter::findIndexInSendNodesYZ(int level, int index, int &indexOfProcess
     return false;
 }
 
-void Parameter::initNumberOfProcessNeighborsAfterFtoCX(int level)
+void Parameter::initProcessNeighborsAfterFtoCX(int level)
 {
     this->getParH(level)->sendProcessNeighborsAfterFtoCX.resize(this->getParH(level)->sendProcessNeighborX.size());
     this->getParH(level)->recvProcessNeighborsAfterFtoCX.resize(this->getParH(level)->recvProcessNeighborX.size());
@@ -1496,7 +1496,7 @@ void Parameter::initNumberOfProcessNeighborsAfterFtoCX(int level)
         this->getParH(level)->recvProcessNeighborsAfterFtoCX.size());
 }
 
-void Parameter::initNumberOfProcessNeighborsAfterFtoCY(int level)
+void Parameter::initProcessNeighborsAfterFtoCY(int level)
 {
     this->getParH(level)->sendProcessNeighborsAfterFtoCY.resize(this->getParH(level)->sendProcessNeighborY.size());
     this->getParH(level)->recvProcessNeighborsAfterFtoCY.resize(this->getParH(level)->recvProcessNeighborY.size());
@@ -1506,7 +1506,7 @@ void Parameter::initNumberOfProcessNeighborsAfterFtoCY(int level)
         this->getParH(level)->recvProcessNeighborsAfterFtoCY.size());
 }
 
-void Parameter::initNumberOfProcessNeighborsAfterFtoCZ(int level)
+void Parameter::initProcessNeighborsAfterFtoCZ(int level)
 {
     this->getParH(level)->sendProcessNeighborsAfterFtoCZ.resize(this->getParH(level)->sendProcessNeighborZ.size());
     this->getParH(level)->recvProcessNeighborsAfterFtoCZ.resize(this->getParH(level)->recvProcessNeighborZ.size());
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index cdedfa1e2b18d4751bef34141108528c64a61bd3..940b2493f96a8eab73d07d6a19031d708bc865dd 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -899,16 +899,16 @@ private:
     std::unique_ptr<CudaStreamManager> cudaStreamManager;
 
 public:
-    //! sets whether streams and thus communication hiding should be used
-    /*! This function is only useful for simulations on multiple GPUs. If there is only one MPI process, the passed value is automatically overwritten with false. */ 
+    //! \brief sets whether streams and thus communication hiding should be used        
+    //! \details This function is only useful for simulations on multiple GPUs. If there is only one MPI process, the passed value is automatically overwritten with false.
     void setUseStreams(bool useStreams);
     bool getUseStreams();
     std::unique_ptr<CudaStreamManager> &getStreamManager();
     bool getKernelNeedsFluidNodeIndicesToRun();
 
-    void initNumberOfProcessNeighborsAfterFtoCX(int level);
-    void initNumberOfProcessNeighborsAfterFtoCY(int level);
-    void initNumberOfProcessNeighborsAfterFtoCZ(int level);
+    void initProcessNeighborsAfterFtoCX(int level);
+    void initProcessNeighborsAfterFtoCY(int level);
+    void initProcessNeighborsAfterFtoCZ(int level);
 
     void findEdgeNodesCommMultiGPU();
     bool useReducedCommunicationAfterFtoC{ true };
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
index aa0551632e566768aaa9b087c072f665d6f7bc3d..df77d0fd4b668ccf745c8c1a04ce7beaa4133860 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
@@ -155,4 +155,221 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
 }
 
 
+TEST(ParameterTest, findEdgeNodesXY_shouldReturnCorrectVector)
+{
+
+    std::filesystem::path filePath = __FILE__;
+    filePath.replace_filename("parameterTest.cfg");
+    vf::basics::ConfigurationFile config;
+    config.load(filePath.string());
+    Parameter para(config, 1, 0);
+
+    para.initLBMSimulationParameter();
+
+    int level = 0;
+    para.parH[level]->recvProcessNeighborX.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborY.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborY.push_back(ProcessNeighbor27());
+
+    int numRecvNeighbor = (int)para.parH[level]->recvProcessNeighborX.size() - 1;
+    int numSendNeighbor = (int)para.parH[level]->sendProcessNeighborY.size() - 1;
+
+    const int sizeRecv                                                    = 6;
+    const int sizeSend                                                    = 10;
+    para.parH[level]->recvProcessNeighborX[numRecvNeighbor].numberOfNodes = sizeRecv;
+    para.parH[level]->sendProcessNeighborY[numSendNeighbor].numberOfNodes = sizeSend;
+
+    int recvNeighbors[sizeRecv]                                   = { 1, 2, 3, 4, 5, 6 };
+    para.parH[level]->recvProcessNeighborX[numRecvNeighbor].index = recvNeighbors;
+
+    int sendNeighbors[sizeSend]                                   = { 20, 1, 21, 22, 6, 23, 5, 24, 25, 26 };
+    para.parH[level]->sendProcessNeighborY[numSendNeighbor].index = sendNeighbors;
+
+
+    para.findEdgeNodesCommMultiGPU();
+
+
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoYRecv = { std::pair(numRecvNeighbor, 0),
+                                                                     std::pair(numRecvNeighbor, 4),
+                                                                     std::pair(numRecvNeighbor, 5) };
+
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoYSend = { std::pair(numSendNeighbor, 1),
+                                                                     std::pair(numSendNeighbor, 6),
+                                                                     std::pair(numSendNeighbor, 4) };
+
+    EXPECT_THAT(para.parH[level]->edgeNodesXtoY.size(), testing::Eq(expectedEdgeNodesXtoYRecv.size()));
+
+    bool vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoYRecv.size(); i++) {
+        if (para.parH[level]->edgeNodesXtoY[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoYRecv[i].first) {
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesXtoY[i].indexInRecvBuffer != expectedEdgeNodesXtoYRecv[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
+
+    EXPECT_TRUE(vectorsAreIdentical);
+
+    vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoYSend.size(); i++) {
+        if (para.parH[level]->edgeNodesXtoY[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoYSend[i].first) {
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesXtoY[i].indexInSendBuffer != expectedEdgeNodesXtoYSend[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
+
+    EXPECT_TRUE(vectorsAreIdentical);
+}
+
+TEST(ParameterTest, findEdgeNodesXZ_shouldReturnCorrectVector)
+{
+
+    std::filesystem::path filePath = __FILE__;
+    filePath.replace_filename("parameterTest.cfg");
+    vf::basics::ConfigurationFile config;
+    config.load(filePath.string());
+    Parameter para(config, 1, 0);
+
+    para.initLBMSimulationParameter();
+
+    int level = 0;
+    para.parH[level]->recvProcessNeighborX.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27());
+
+    int numRecvNeighbor = (int)para.parH[level]->recvProcessNeighborX.size() - 1;
+    int numSendNeighbor = (int)para.parH[level]->sendProcessNeighborZ.size() - 1;
+
+    const int sizeRecv = 10;
+    const int sizeSend = 6;
+
+    para.parH[level]->recvProcessNeighborX[numRecvNeighbor].numberOfNodes = sizeRecv;
+    para.parH[level]->sendProcessNeighborZ[numSendNeighbor].numberOfNodes = sizeSend;
+
+    int recvNeighbors[sizeRecv]                                   = { 20, 1, 21, 22, 6, 23, 5, 24, 25, 26 };
+    para.parH[level]->recvProcessNeighborX[numRecvNeighbor].index = recvNeighbors;
+
+    int sendNeighbors[sizeSend]                                   = { 1, 2, 3, 4, 5, 6 };
+    para.parH[level]->sendProcessNeighborZ[numSendNeighbor].index = sendNeighbors;
+
+
+    para.findEdgeNodesCommMultiGPU();
+
+
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair(numRecvNeighbor, 1),
+                                                                     std::pair(numRecvNeighbor, 4),
+                                                                     std::pair(numRecvNeighbor, 6) };
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair(numSendNeighbor, 0),
+                                                                     std::pair(numSendNeighbor, 5),
+                                                                     std::pair(numSendNeighbor, 4) };
+
+    EXPECT_THAT(para.parH[level]->edgeNodesXtoZ.size(), testing::Eq(expectedEdgeNodesXtoZRecv.size()));
+
+    bool vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) {
+        if (para.parH[level]->edgeNodesXtoZ[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoZRecv[i].first) {
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesXtoZ[i].indexInRecvBuffer != expectedEdgeNodesXtoZRecv[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
+
+    EXPECT_TRUE(vectorsAreIdentical);
+
+    vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) {
+        if (para.parH[level]->edgeNodesXtoZ[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoZSend[i].first){
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesXtoZ[i].indexInSendBuffer != expectedEdgeNodesXtoZSend[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
+
+    EXPECT_TRUE(vectorsAreIdentical);
+}
+
+TEST(ParameterTest, findEdgeNodesYZ_shouldReturnCorrectVector)
+{
+
+    std::filesystem::path filePath = __FILE__;
+    filePath.replace_filename("parameterTest.cfg");
+    vf::basics::ConfigurationFile config;
+    config.load(filePath.string());
+    Parameter para(config, 1, 0);
+
+    para.initLBMSimulationParameter();
+
+    int level = 0;
+
+    para.parH[level]->recvProcessNeighborY.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27());
+    para.parH[level]->sendProcessNeighborZ.push_back(ProcessNeighbor27());
+
+    const int sizeRecv  = 10;
+    const int sizeSend1 = 6;
+    const int sizeSend2 = 5;
+
+    para.parH[level]->recvProcessNeighborY[0].numberOfNodes = sizeRecv;
+    para.parH[level]->sendProcessNeighborZ[0].numberOfNodes = sizeSend1;
+    para.parH[level]->sendProcessNeighborZ[1].numberOfNodes = sizeSend2;
+
+    int recvNeighbors[sizeRecv]                     = { 20, 1, 9, 22, 6, 23, 5, 24, 11, 26 };
+    para.parH[level]->recvProcessNeighborY[0].index = recvNeighbors;
+
+    int sendNeighbors1[sizeSend1]                   = { 1, 2, 3, 4, 5, 6 };
+    int sendNeighbors2[sizeSend2]                   = { 7, 8, 9, 10, 11 };
+    para.parH[level]->sendProcessNeighborZ[0].index = sendNeighbors1;
+    para.parH[level]->sendProcessNeighborZ[1].index = sendNeighbors2;
+
+
+    para.findEdgeNodesCommMultiGPU();
+
+
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoZRecv = { std::pair(0, 1), std::pair(0, 2), std::pair(0, 4),
+                                                                     std::pair(0, 6), std::pair(0, 8) };
+    std::vector<std::pair<int, int>> expectedEdgeNodesXtoZSend = { std::pair(0, 0), std::pair(1, 2), std::pair(0, 5),
+                                                                     std::pair(0, 4), std::pair(1, 4) };
+
+    EXPECT_THAT(para.parH[level]->edgeNodesYtoZ.size(), testing::Eq(expectedEdgeNodesXtoZRecv.size()));
+
+    bool vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) {
+        if (para.parH[level]->edgeNodesYtoZ[i].indexOfProcessNeighborRecv != expectedEdgeNodesXtoZRecv[i].first) {
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesYtoZ[i].indexInRecvBuffer != expectedEdgeNodesXtoZRecv[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
+
+    EXPECT_TRUE(vectorsAreIdentical);
+
+    vectorsAreIdentical = true;
+    for (int i = 0; i < (int)expectedEdgeNodesXtoZRecv.size(); i++) {
+        if (para.parH[level]->edgeNodesYtoZ[i].indexOfProcessNeighborSend != expectedEdgeNodesXtoZSend[i].first) {
+            vectorsAreIdentical = false;
+            break;
+        }
+        if (para.parH[level]->edgeNodesYtoZ[i].indexInSendBuffer != expectedEdgeNodesXtoZSend[i].second) {
+            vectorsAreIdentical = false;
+            break;
+        }
+    }
 
+    EXPECT_TRUE(vectorsAreIdentical);
+}
\ No newline at end of file