diff --git a/CMake/3rd/vtk.cmake b/CMake/3rd/vtk.cmake
index 83cf22e8849298d2b42909664cacd5fd9044903e..7bba91a5a25da622ef752d670845754a6349815c 100644
--- a/CMake/3rd/vtk.cmake
+++ b/CMake/3rd/vtk.cmake
@@ -3,9 +3,8 @@
 # VTK_DIR needs to bet set to the VTK build directory in the config file.
 #########################################################################
 find_package(VTK REQUIRED)
-    vf_get_library_name(library_name)
+vf_get_library_name(library_name)
 
-    include(${VTK_USE_FILE})
-    target_include_directories(${library_name} PRIVATE ${VTK_INCLUDE_DIRS})
+target_include_directories(${library_name} PRIVATE ${VTK_INCLUDE_DIRS})
 
-    target_link_libraries(${library_name} PRIVATE ${VTK_LIBRARIES})
+target_link_libraries(${library_name} PRIVATE ${VTK_LIBRARIES})
diff --git a/CMake/Cache.cmake b/CMake/Cache.cmake
index 23f5e83e17b67802b4886cd76b468127047f230c..5cd6b500bb9d38afa095fc0eb1087b228ee48ab2 100644
--- a/CMake/Cache.cmake
+++ b/CMake/Cache.cmake
@@ -27,5 +27,5 @@ if(CACHE_BINARY)
   set(CMAKE_CUDA_COMPILER_LAUNCHER ${CACHE_BINARY})
   set(CMAKE_C_COMPILER_LAUNCHER ${CACHE_BINARY})
 else()
-  message(WARNING "${CACHE_OPTION} is enabled but was not found. Not using it")
+  message(STATUS "${CACHE_OPTION} is enabled but was not found. Not using it")
 endif()
diff --git a/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake b/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..edaf7669b171518c8aa6b2ec9786147a6ffc48f5
--- /dev/null
+++ b/CMake/cmake_config_files/GITLAB-RUNNER03.config.cmake
@@ -0,0 +1,10 @@
+#################################################################################
+# VirtualFluids MACHINE FILE
+# Responsible: Soeren Peters
+# OS:          Windows 10
+#################################################################################
+
+
+#SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
+#LIST(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
+SET(CMAKE_CUDA_ARCHITECTURES 61)
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39eb35289dc49bf6aebade13ea24937c0cbacb59..163ec4f05ee8b12d7641f3856ae4640565101851 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,9 +47,9 @@ option(BUILD_VF_GPU "Build VirtualFluids gpu variant" OFF)
 
 option(BUILD_USE_OPENMP "Build VirtualFluids with openmp" ON)
 option(BUILD_USE_BOOST "Build VirtualFluids with boost" OFF)
+option(BUILD_USE_MPI "include MPI library support" ON)
 
 # vf gpu
-option(BUILD_VF_GPU          "Build VirtualFluids GPU"     ON )
 option(BUILD_VF_GKS          "Build VirtualFluids GKS"     OFF )
 option(BUILD_VF_TRAFFIC      "Build VirtualFluids Traffic" OFF)
 option(BUILD_JSONCPP         "Builds json cpp "            OFF)
@@ -97,7 +97,7 @@ IF( BUILD_VF_DOUBLE_ACCURACY )
     list(APPEND VF_COMPILER_DEFINITION VF_DOUBLE_ACCURACY)
 ENDIF()
 
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
     include(CheckLanguage)
     check_language(CUDA)
 
@@ -133,6 +133,7 @@ if(BUILD_VF_GPU)
 
     message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
     message(STATUS "CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}")
+    set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "Cuda Architecture (compute capabilitiy)")
 endif()
 
 
@@ -164,7 +165,10 @@ if(BUILD_USE_OPENMP)
     find_package(OpenMP REQUIRED)
 endif()
 
-find_package(MPI REQUIRED)
+if(BUILD_USE_MPI)
+    find_package(MPI REQUIRED)
+    list(APPEND VF_COMPILER_DEFINITION VF_MPI)
+endif()
 
 # boost
 IF(BUILD_USE_BOOST)
@@ -181,8 +185,7 @@ ENDIF()
 
 add_subdirectory(src/logger)
 add_subdirectory(src/basics)
-#add_subdirectory(src/mpi)
-#add_subdirectory(src/cuda)
+add_subdirectory(src/mpi)
 add_subdirectory(src/lbm)
 
 
@@ -192,7 +195,8 @@ add_subdirectory(src/lbm)
 if (BUILD_VF_CPU)
     include (cpu.cmake)
 endif()
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
+    add_subdirectory(src/cuda)
     include (gpu.cmake)
 endif()
 
diff --git a/apps/cpu/AcousticPulse/ap.cpp b/apps/cpu/AcousticPulse/ap.cpp
index 3ce1b39e558f3dadc0cd024e05180103d87b2f21..77ddfbf626e25b8c5d17d438255e37b4977dbddd 100644
--- a/apps/cpu/AcousticPulse/ap.cpp
+++ b/apps/cpu/AcousticPulse/ap.cpp
@@ -10,7 +10,7 @@ void run()
 {
    try
    {
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/BeadPack/beadpack.cpp b/apps/cpu/BeadPack/beadpack.cpp
index 09980876882a1ceb2b555256ddf891cd00f08384..d683fc445359e6e2d19a7d6f72c59158d6bf7f98 100644
--- a/apps/cpu/BeadPack/beadpack.cpp
+++ b/apps/cpu/BeadPack/beadpack.cpp
@@ -23,7 +23,7 @@ void sbonepd(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (machine == "BOMBADIL")
@@ -73,8 +73,8 @@ void sbonepd(const char *configname)
       LBMReal rho_LB = 0.0;
       //nueWasser = 1e-6 m^2/s
       double nu_real = 1e-6;
-      LBMReal dt = 5e-8; // s (frei gewählt)
-      //dx - frei gewählt
+      LBMReal dt = 5e-8; // s (frei gewï¿½hlt)
+      //dx - frei gewï¿½hlt
       //
       LBMReal nu_LB = nu_real / (dx*dx / dt);
 
diff --git a/apps/cpu/BoxBenchmark/bb.cpp b/apps/cpu/BoxBenchmark/bb.cpp
index 719df7738df87e2b310aa03332fb2a3ef8f2ffe6..7d5c0ec2f4fdf4627b87a32727925f62a3bc89e9 100644
--- a/apps/cpu/BoxBenchmark/bb.cpp
+++ b/apps/cpu/BoxBenchmark/bb.cpp
@@ -37,7 +37,7 @@ void run(string configname)
 
       //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG3");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/CheckpointConverter/cpc.cpp b/apps/cpu/CheckpointConverter/cpc.cpp
index c26e7df4c682e1c5214d15644c57d52ac7d8413a..4eb526cc75be39153f61cbc4d599a21bcc5394b4 100644
--- a/apps/cpu/CheckpointConverter/cpc.cpp
+++ b/apps/cpu/CheckpointConverter/cpc.cpp
@@ -17,7 +17,7 @@ void run(string configname)
       int    step = config.getValue<int>("step");
       int    numberOfProcesses = config.getValue<int>("numberOfProcesses");
       
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       SPtr<Grid3D> grid(new Grid3D(comm));
diff --git a/apps/cpu/ConvectionOfVortex/cov.cpp b/apps/cpu/ConvectionOfVortex/cov.cpp
index 2782060b9d68f2673e840ec4b882b0a66544c564..4e1f592b896c21da5bc6bc59f3b2bfb584650374 100644
--- a/apps/cpu/ConvectionOfVortex/cov.cpp
+++ b/apps/cpu/ConvectionOfVortex/cov.cpp
@@ -10,7 +10,7 @@ void run()
 {
    try
    {
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/CouetteFlow/cflow.cpp b/apps/cpu/CouetteFlow/cflow.cpp
index e37bafade742c42601fdeb66cfa45ca516097ff3..276fbe125b5b3cd0271542f7e9d8ab9f9abca518 100644
--- a/apps/cpu/CouetteFlow/cflow.cpp
+++ b/apps/cpu/CouetteFlow/cflow.cpp
@@ -37,7 +37,7 @@ void bflow(string configname)
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DHIT/dhit.cpp b/apps/cpu/DHIT/dhit.cpp
index 957c823ef664e4dd19df45fa322e93a8c8164bb7..850a531a9ac490ed195ad17ce298ca5cf027151a 100644
--- a/apps/cpu/DHIT/dhit.cpp
+++ b/apps/cpu/DHIT/dhit.cpp
@@ -29,7 +29,7 @@ void run(string configname)
       double          lambda = config.getDouble("lambda");
       double          initTime = config.getDouble("initTime");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16-Porous/f16.cpp b/apps/cpu/DLR-F16-Porous/f16.cpp
index 9197aca45bbfed9be817fbcb83db4973e4a58f87..7ddd135b2996c794e2802e235fdedc2a3013cdf3 100644
--- a/apps/cpu/DLR-F16-Porous/f16.cpp
+++ b/apps/cpu/DLR-F16-Porous/f16.cpp
@@ -95,7 +95,7 @@ void run(string configname)
       
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16-Solid/f16.cpp b/apps/cpu/DLR-F16-Solid/f16.cpp
index d88b650b143793b67388f920bd2e23c380af0e5a..328da5dc54b5a2db751eb71ad1ce8810cb471470 100644
--- a/apps/cpu/DLR-F16-Solid/f16.cpp
+++ b/apps/cpu/DLR-F16-Solid/f16.cpp
@@ -47,7 +47,7 @@ void run(string configname)
       double          timeAvStart       = config.getValue<double>("timeAvStart");
       double          timeAvStop        = config.getValue<double>("timeAvStop");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16/f16.cpp b/apps/cpu/DLR-F16/f16.cpp
index 523a9c7c09795627d43de8b119597e52caf4d5b0..2bbc1b8ed63be110d4b6ccc98cdcdb88337d4760 100644
--- a/apps/cpu/DLR-F16/f16.cpp
+++ b/apps/cpu/DLR-F16/f16.cpp
@@ -13,7 +13,7 @@ double rangeRandom1()
 
 void setBC(SPtr<Grid3D> grid, string pathGeo, string fngFileWhole, string zigZagTape, vector<double>  boundingBox, double uLB, double rhoLB, double blockLength, SPtr<BCProcessor> bcProcessor)
 {
-   SPtr<Communicator> comm = MPICommunicator::getInstance();
+   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
    
    std::vector<std::vector<SPtr<Block3D>> > blockVector;
@@ -205,7 +205,7 @@ void run(string configname)
       int             chunk = config.getValue<int>("chunk");
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp b/apps/cpu/FlowAroundCylinder/cylinder.cpp
index 81bfd5ff3658d41e29ab6f56590119de7db3fe82..2f470d17dbac3600b2c42acede2bba9e6c45f872 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp
@@ -33,7 +33,7 @@ void run(string configname)
       vector<int>     blockNx = config.getVector<int>("blockNx");
       double          dx = config.getValue<double>("dx");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp.old b/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
index 7293ab62fcbe0e43511df29b995aa82c2be37ca5..f251ee63514c67cca6df0e998cc196d3cc5a9ec8 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
@@ -15,7 +15,7 @@ void run(const char *cstr)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
@@ -385,7 +385,7 @@ void run2(const char *cstr)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp b/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
index 05f6c625be6ffe4cd56a673eb44a62e6c59d5ce0..074e8c3aa9b338fa00db0718862d20c7e99f5e55 100644
--- a/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
+++ b/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
@@ -29,7 +29,7 @@ using namespace std;
 //      double          deltax = config.getDouble("deltax");
 //
 //
-//      SPtr<Communicator> comm = MPICommunicator::getInstance();
+//      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 //      int myid = comm->getProcessID();
 //
 //      if (logToFile)
@@ -322,7 +322,7 @@ void pflowdp(string configname)
       double          cpStepStart = config.getValue<double>("cpStepStart");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       LBMReal rhoLB = 0.0;
diff --git a/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp b/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
index 5bcf21ed9596ce6d8d6bce9974b98627c007168d..40bf20ecabe23637b6271edd6ac6c4fd951f4760 100644
--- a/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
+++ b/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
@@ -29,7 +29,7 @@ void pflowdp(string configname)
       double          deltax = config.getValue<double>("deltax");
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       LBMReal rhoLB = 0.0;
diff --git a/apps/cpu/HerschelBulkleyModel/hbflow.cpp b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
index 976635bdea0823f6e4cdef208f06d98499e0eabf..8483883aca772693758b9f52fcee53c54b84a1d5 100644
--- a/apps/cpu/HerschelBulkleyModel/hbflow.cpp
+++ b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
@@ -38,7 +38,7 @@ void bflow(string configname)
 //      double          Bn = config.getValue<double>("Bn");
       double          scaleFactor = config.getValue<double>("scaleFactor");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
index eff2c2c628995b34f00650d310275d186d38f2fc..d86424869d5598adb2dfa61c1da2f6590bd79401 100644
--- a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
+++ b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
@@ -35,7 +35,7 @@ void bflow(string configname)
       double          Bn = config.getValue<double>("Bn");
       vector<double>  sphereCenter = config.getVector<double>("sphereCenter");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/InterfaceTest/itest.cpp b/apps/cpu/InterfaceTest/itest.cpp
index 104305be9e8f0fde44bb8d5f129fb3a55786b70a..face949bd7d68c8d4fe94d7e32dc728b7818f42f 100644
--- a/apps/cpu/InterfaceTest/itest.cpp
+++ b/apps/cpu/InterfaceTest/itest.cpp
@@ -11,7 +11,7 @@ void run()
 {
    try
    {
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/JetBreakup/JetBreakup.cpp b/apps/cpu/JetBreakup/JetBreakup.cpp
index d6cff6366a00a65983ee67075f99d8ca2c9e1ff9..eb7d705537e4307e4ca1066ac9d06dafb72449f4 100644
--- a/apps/cpu/JetBreakup/JetBreakup.cpp
+++ b/apps/cpu/JetBreakup/JetBreakup.cpp
@@ -51,7 +51,7 @@ void run(string configname)
       double beta = 12 * sigma / interfaceThickness;
       double kappa = 1.5 * interfaceThickness * sigma;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 3cffab61c644e4b0cfab2795e2a8c7698555262a..c8951903084ccacf208718f25948206b1c98c1ab 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -31,7 +31,7 @@ void run(string configname)
       double          cpStep = config.getValue<double>("cpStep");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/LaminarTubeFlowConv/ltf.cpp b/apps/cpu/LaminarTubeFlowConv/ltf.cpp
index 6c20cdea96b775b9ae3314daa628c26774c30788..53cd7c1ac7900118f47e483f867d22de2e3e7974 100644
--- a/apps/cpu/LaminarTubeFlowConv/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlowConv/ltf.cpp
@@ -30,7 +30,7 @@ void run(int tn)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp b/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
index 104d487b905c03901ffa3ecc24f2f34b44cffe5e..1819ee0f6fe00191f28ddfcab8cce93466047289 100644
--- a/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
+++ b/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
@@ -80,7 +80,7 @@ int main(int  /*argc*/, char*  /*argv*/[])
       double g_maxX3 = 0.5;
 
       // NullCommunicator is a place-holder for interprocess communication
-      SPtr<Communicator> comm = NullCommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = NullCommunicator::getInstance();
       // new grid object
       SPtr<Grid3D> grid(new Grid3D(comm));
       // set grid spacing
diff --git a/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup b/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup
index 389b754f848e8886140113310572686a9efd908b..533fb619c2bb82783b99110894079594b5ddba47 100644
--- a/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup	
+++ b/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup b/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup
index 1adc07f8b293327e9bf814d82ebcca8b8aa91d44..954d4b539411adb36ea47724ab612fcd8d70be87 100644
--- a/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup	
+++ b/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/Multiphase.cpp b/apps/cpu/Multiphase/Multiphase.cpp
index 77bcfd0c00a4f2d6050de7dfb58a2c923b5d2ce4..9fd7110f25bddb2f23ef550bbb9a7c0754fab9d8 100644
--- a/apps/cpu/Multiphase/Multiphase.cpp
+++ b/apps/cpu/Multiphase/Multiphase.cpp
@@ -52,7 +52,7 @@ void run(string configname)
         double beta  = 12 * sigma / interfaceThickness;
         double kappa = 1.5 * interfaceThickness * sigma;
 
-        SPtr<Communicator> comm = MPICommunicator::getInstance();
+        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp b/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp
index 18989d3601729dcc663f707bc21e3a7f91a57dfd..3362f66d5b07d1b94fd6c2623af58011ae425905 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp b/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp
index b6160314eb87c785443602a4c5eb35ca16ebc02b..5ed66edace35c61fa3244c445fa479db13ccec8f 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp	
@@ -67,7 +67,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp b/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp
index 11e064e30a210485aabd40c40ee925e3acf56922..a6355a2aa56e0bd85d919250e750427df7662284 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp b/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp
index 1adc07f8b293327e9bf814d82ebcca8b8aa91d44..954d4b539411adb36ea47724ab612fcd8d70be87 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp b/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp
index 2f3e94e7acf42de01826e0aac1daff2689d04acb..1af6f9a19754035d1e4cc1466d80fa0d77de52c7 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp b/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp
index a61d7b7541f05e66eab79f0e5bf1f4e91325632f..51ba4463a32740f8b2e07391d1ab174ccb9e7095 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp b/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp
index 7febd0cf18d80039af1fc284612789321b21e6b6..7e6dfd24a1ba90420891de57a7ba399d5c860e7d 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp	
@@ -42,7 +42,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase.cpp b/apps/cpu/Multiphase/backup/Multiphase.cpp
index df4c311d8de62ce79aa337af19561b510b859d79..8b09c15f4f5eb829ca37fce160bbb86ebb92f19d 100644
--- a/apps/cpu/Multiphase/backup/Multiphase.cpp
+++ b/apps/cpu/Multiphase/backup/Multiphase.cpp
@@ -78,7 +78,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/MultiphaseDropletTest/droplet.cpp b/apps/cpu/MultiphaseDropletTest/droplet.cpp
index 829ad38628665cbe006e7f9b88cb63b8ea362472..fe27ecce81cd97215600b54d613e7123cb80f261 100644
--- a/apps/cpu/MultiphaseDropletTest/droplet.cpp
+++ b/apps/cpu/MultiphaseDropletTest/droplet.cpp
@@ -45,7 +45,7 @@ void run(string configname)
         double beta  = 12 * sigma / interfaceThickness;
         double kappa = 1.5 * interfaceThickness * sigma;
 
-        SPtr<Communicator> comm = MPICommunicator::getInstance();
+        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/OrganPipe/OrganPipe.cpp b/apps/cpu/OrganPipe/OrganPipe.cpp
index 8ad094bc9cc8db392eac1f7926365a9dca2586c6..ad6ec5a1d0892b60f699b74ee4101a9f4ad047e3 100644
--- a/apps/cpu/OrganPipe/OrganPipe.cpp
+++ b/apps/cpu/OrganPipe/OrganPipe.cpp
@@ -8,7 +8,7 @@ void run(string configname)
 {
    try
    {
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (myid == 0) UBLOG(logINFO, "Testcase organ pipe");
diff --git a/apps/cpu/PlateWithPorousInlay/plate.cpp b/apps/cpu/PlateWithPorousInlay/plate.cpp
index 60531c9ae960eec48264d3876395a5edc69bc499..315bacfa954640c8963ef46c3a7c840280a69e06 100644
--- a/apps/cpu/PlateWithPorousInlay/plate.cpp
+++ b/apps/cpu/PlateWithPorousInlay/plate.cpp
@@ -52,7 +52,7 @@ void run(const char *cstr)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -118,9 +118,9 @@ void run(const char *cstr)
 
       ///////////////Knotenabmessungen:
       int nx[3], blocknx[3];
-      nx[0] = 90;//240;//120;//60;//86;//43;//65;//50;  //länge
+      nx[0] = 90;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       nx[1] = 2;//2;//6;///1;//5;// //breite
-      nx[2] = 30;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      nx[2] = 30;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       blocknx[0] = 16;//10;//6;
       blocknx[1] = 16;//10;//6;
       blocknx[2] = 16;//10;//6;
@@ -128,11 +128,11 @@ void run(const char *cstr)
       int baseLevel = 0;
       int refineLevel = 5;
 
-      double H = 600.0; // Kanalhöhe [mm]
+      double H = 600.0; // Kanalhï¿½he [mm]
       double cdx = H / (double)(nx[2] * blocknx[2]);
       double fdx = cdx / double(1 << refineLevel);
 
-      //double h = 200.0; // gewünschte Plattenhöhe in Gitterpunkten
+      //double h = 200.0; // gewï¿½nschte Plattenhï¿½he in Gitterpunkten
       //double fdx = plate->getLengthX3()/h;
       //double cdx = fdx*double(1<<refineLevel);
 
@@ -147,7 +147,7 @@ void run(const char *cstr)
       // Re = 1000000
       // V = 16.05  # m / s
       // p = 994.7  #hPa(manuell abgelesen von MUB)
-      // T = 21.78  #°C
+      // T = 21.78  #ï¿½C
       // Luftfeuchte = 50.5   # %
       //////////////////////////////////////////////////////////////////////////
       // Simulation Parametr
@@ -155,7 +155,7 @@ void run(const char *cstr)
       double Re = 1e6; // 1133333.3333333335;
       double rhoLB = 0.0;
       double uLB = 0.1;
-      double lReal = 1000; //Plattenlänge in mm
+      double lReal = 1000; //Plattenlï¿½nge in mm
       double nuLB = (uLB*(lReal / cdx)) / Re;
 
       int sizeSP = 4;
diff --git a/apps/cpu/PoiseuilleFlow/pf1.cpp b/apps/cpu/PoiseuilleFlow/pf1.cpp
index 3880e9583dd07bdad7fcd11272f0a372155ef654..4e4d87ecc797db7545b2dae84e1f76220a02cc33 100644
--- a/apps/cpu/PoiseuilleFlow/pf1.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf1.cpp
@@ -7,7 +7,7 @@ using namespace std;
 //pipe flow with forcing
 void pf1()
 {
-   SPtr<Communicator> comm = MPICommunicator::getInstance();
+   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
    //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf2.cpp b/apps/cpu/PoiseuilleFlow/pf2.cpp
index addb56c279c8fbddd51b6c03ac514014c9c33423..f312bc37dacc3caadd9935f450cdcf808c945b4f 100644
--- a/apps/cpu/PoiseuilleFlow/pf2.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf2.cpp
@@ -6,7 +6,7 @@
 ////pipe flow with pressure drop
 //void pf2()
 //{
-//   SPtr<Communicator> comm = MPICommunicator::getInstance();
+//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf3.cpp b/apps/cpu/PoiseuilleFlow/pf3.cpp
index a2bcd1edcf49cd0245853feed5e16e102932ca95..a2eca67d593fd9e21641e99bd3910e4c775f00a9 100644
--- a/apps/cpu/PoiseuilleFlow/pf3.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf3.cpp
@@ -6,7 +6,7 @@
 ////two plates flow with forcing
 //void pf3()
 //{
-//   SPtr<Communicator> comm = MPICommunicator::getInstance();
+//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf4.cpp b/apps/cpu/PoiseuilleFlow/pf4.cpp
index 28e81e76a6f2ad1e47b6589a826ca5139a265547..9b249f94e19fd6c53b6410702406c498c101ed42 100644
--- a/apps/cpu/PoiseuilleFlow/pf4.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf4.cpp
@@ -6,7 +6,7 @@
 ////two plates flow with pressure drop
 //void pf4()
 //{
-//   SPtr<Communicator> comm = MPICommunicator::getInstance();
+//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/Thermoplast/thermoplast.cpp b/apps/cpu/Thermoplast/thermoplast.cpp
index dc77db67a844004fe2ecc63a15df125c03ad1903..b004543254e4788ffa6a7314536b1811c217700b 100644
--- a/apps/cpu/Thermoplast/thermoplast.cpp
+++ b/apps/cpu/Thermoplast/thermoplast.cpp
@@ -52,7 +52,7 @@ vector<double> peMaxOffset;
 string          pathOut;// = "d:/temp/thermoplastCluster";
 string          pathGeo;// = "d:/Projects/ThermoPlast/Geometrie";
 
-void addNozzle(SPtr<Grid3D> grid, SPtr<Communicator> comm, SPtr<BCAdapter> noSlipBCAdapter/*, InteractorsHelper& intHelper*/)
+void addNozzle(SPtr<Grid3D> grid, SPtr<vf::mpi::Communicator> comm, SPtr<BCAdapter> noSlipBCAdapter/*, InteractorsHelper& intHelper*/)
 {
    int myid = comm->getProcessID();
    if (myid==0) UBLOG(logINFO, "Add nozzles:start");
@@ -126,7 +126,7 @@ void addNozzle(SPtr<Grid3D> grid, SPtr<Communicator> comm, SPtr<BCAdapter> noSli
    if (myid==0) UBLOG(logINFO, "Add nozzles:end");
 }
 
-std::shared_ptr<DemCoProcessor> makePeCoProcessor(SPtr<Grid3D> grid, SPtr<Communicator> comm, const SPtr<UbScheduler> peScheduler, const std::shared_ptr<LBMUnitConverter> lbmUnitConverter, int maxpeIterations)
+std::shared_ptr<DemCoProcessor> makePeCoProcessor(SPtr<Grid3D> grid, SPtr<vf::mpi::Communicator> comm, const SPtr<UbScheduler> peScheduler, const std::shared_ptr<LBMUnitConverter> lbmUnitConverter, int maxpeIterations)
 {
    double peRelaxtion = 0.7;
    //int maxpeIterations = 10000;
@@ -194,7 +194,7 @@ void createSpheres(double radius, Vector3D origin, int maxX2, int maxX3, double
 
 void thermoplast(string configname)
 {
-   SPtr<Communicator> comm = MPICommunicator::getInstance();
+   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
    vf::basics::ConfigurationFile   config;
diff --git a/apps/cpu/ViskomatXL/viskomat.cpp b/apps/cpu/ViskomatXL/viskomat.cpp
index 12eea28c32990f38393bc3d3478cd0833c30c5ef..be7d3e850b633f7f40b24eaffd024487447d0c12 100644
--- a/apps/cpu/ViskomatXL/viskomat.cpp
+++ b/apps/cpu/ViskomatXL/viskomat.cpp
@@ -41,7 +41,7 @@ void bflow(string configname)
 
       //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Wing/wing.cpp b/apps/cpu/Wing/wing.cpp
index 34f75cebaa8f23c913e2bf19130299e307a707f2..ff6cbcfcab3b60669aea19ca6a56077034f0e7dc 100644
--- a/apps/cpu/Wing/wing.cpp
+++ b/apps/cpu/Wing/wing.cpp
@@ -30,7 +30,7 @@ void setup(const char *cstr1, const char *cstr2)
       int refineLevel = UbSystem::stringTo<int>(cf.getValue("refineLevel"));
       int blocknx = UbSystem::stringTo<int>(cf.getValue("blocknx"));
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "Bombadil") int dumy=0; 
diff --git a/apps/cpu/aperm/aperm.cpp b/apps/cpu/aperm/aperm.cpp
index 0172975c677e0072d3ead6ab13127cca98ca7161..ecff1a453276444b706e31729012dea10597ac61 100644
--- a/apps/cpu/aperm/aperm.cpp
+++ b/apps/cpu/aperm/aperm.cpp
@@ -59,7 +59,7 @@ void run(string configname)
       double          cpStepStart = config.getDouble("cpStepStart");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/aperm/aperm.cpp.old b/apps/cpu/aperm/aperm.cpp.old
index 87f05527469d44c5ea565108e515bea2a6b50a29..54dfe45fd86ff791d3965632a21acac0a3284aea 100644
--- a/apps/cpu/aperm/aperm.cpp.old
+++ b/apps/cpu/aperm/aperm.cpp.old
@@ -58,7 +58,7 @@ void run(string configname)
       bool            yDir = config.getBool("yDir");
       bool            zDir = config.getBool("zDir");
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/aperm/aperm.cpp.old2 b/apps/cpu/aperm/aperm.cpp.old2
index fd2b40d89a9479ba50843028a3d61e9422e759b7..bd49f895a277a34608393a7cb53c0b6466526a95 100644
--- a/apps/cpu/aperm/aperm.cpp.old2
+++ b/apps/cpu/aperm/aperm.cpp.old2
@@ -55,7 +55,7 @@ void run(string configname)
       bool            yDir = config.getBool("yDir");
       bool            zDir = config.getBool("zDir");
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/bChannelA/bChannelA.cpp b/apps/cpu/bChannelA/bChannelA.cpp
index 75e263bbe125871598fdd1cbd38749e41132a224..d50e87437b5fa17353b0f8adb298ec91ecc9d964 100644
--- a/apps/cpu/bChannelA/bChannelA.cpp
+++ b/apps/cpu/bChannelA/bChannelA.cpp
@@ -111,7 +111,7 @@ void run(string configname)
       vector<double>  nupsStep          = config.getVector<double>("nupsStep");
       vector<double>  boundingBox       = config.getVector<double>("boundingBox");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/bChannelVA/bChannelVA.cpp b/apps/cpu/bChannelVA/bChannelVA.cpp
index dce429bd17250e90d3cd2ac753e77f720a184c70..6cfe5dac2557f167864495599074cd3c94da6517 100644
--- a/apps/cpu/bChannelVA/bChannelVA.cpp
+++ b/apps/cpu/bChannelVA/bChannelVA.cpp
@@ -13,7 +13,7 @@ int main(int argc, char* argv[])
    try
    {
       //Sleep(20000);
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       //Pheonix
diff --git a/apps/cpu/bKanal/HLRNb/bKanal.cpp b/apps/cpu/bKanal/HLRNb/bKanal.cpp
index d7028dba969d01b409a3fc84b4fc3b83556da69a..0c5c46a0cb78354563425685c8346ff81258ccd2 100644
--- a/apps/cpu/bKanal/HLRNb/bKanal.cpp
+++ b/apps/cpu/bKanal/HLRNb/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
@@ -77,9 +77,9 @@ void run(const char *cstr)
 
       int H=200;//200;//392;
 
-      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //länge
+      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //lï¿½nge
       //  nx[1]      =8;//ok mit 8// (int)(2.0*(double)H/8.0/8.0);//2;// (int)(0.2*(double)H/6.0/4.0);//  //breite
-      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //höhe gebiet
+      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //hï¿½he gebiet
 
       //(3/2/2)-ratio:
       nx[1]=nx[2];
@@ -289,7 +289,7 @@ void run(const char *cstr)
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.25));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitorminl3(wallsX1X2minRefl3, 0,refineLevel-3);
          //grid->accept(refineVisitorminl3);
-         /////würfel unten version
+         /////wï¿½rfel unten version
          //      GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.2));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -308,7 +308,7 @@ void run(const char *cstr)
 
 
 
-         /////würfel anfang version
+         /////wï¿½rfel anfang version
          //       GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.56));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -389,10 +389,10 @@ void run(const char *cstr)
          ///interactoren
          //int bbOption1 = 0; //0=simple Bounce Back, 1=quadr. BB
          //D3Q27BoundaryConditionAdapterPtr bcObst(new D3Q27NoSlipBCAdapter(bbOption1));
-         ///////würfel unten version ende
+         ///////wï¿½rfel unten version ende
          ////////////////////////////////////////////////////////////////////////////////
          ////////PM grid
-         //Temporär:
+         //Temporï¿½r:
          //double  H=1.0;
 
          vector<D3Q27InteractorPtr> D3Q27InteractorPtrarray;
diff --git a/apps/cpu/bKanal/bKanal.cpp b/apps/cpu/bKanal/bKanal.cpp
index b6ee71f79963d0c2e6336cbe2455babba6b3cea2..94af8f6aa46ddf5f398747805836ba95ce1dbbaf 100644
--- a/apps/cpu/bKanal/bKanal.cpp
+++ b/apps/cpu/bKanal/bKanal.cpp
@@ -24,7 +24,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -81,9 +81,9 @@ void run(const char *cstr)
 
       int H=200;//200;//392;
 
-      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //länge
+      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //lï¿½nge
       //  nx[1]      =8;//ok mit 8// (int)(2.0*(double)H/8.0/8.0);//2;// (int)(0.2*(double)H/6.0/4.0);//  //breite
-      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //höhe gebiet
+      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //hï¿½he gebiet
 
       //(3/2/2)-ratio:
       nx[1]=nx[2];
@@ -267,10 +267,10 @@ void run(const char *cstr)
          ///interactoren
          //int bbOption1 = 0; //0=simple Bounce Back, 1=quadr. BB
          //D3Q27BoundaryConditionAdapterPtr bcObst(new D3Q27NoSlipBCAdapter(bbOption1));
-         ///////würfel unten version ende
+         ///////wï¿½rfel unten version ende
          ////////////////////////////////////////////////////////////////////////////////
          ////////PM grid
-         //Temporär:
+         //Temporï¿½r:
          //double  H=1.0;
 
          vector<D3Q27InteractorPtr> D3Q27InteractorPtrarray;
diff --git a/apps/cpu/bKanal/sKanal/bKanal.cpp b/apps/cpu/bKanal/sKanal/bKanal.cpp
index cabea74871a6da507b1c9c9ddf23820a936bdb10..6a9d3c2c697b04c176bc9c11aa38f7f719e07785 100644
--- a/apps/cpu/bKanal/sKanal/bKanal.cpp
+++ b/apps/cpu/bKanal/sKanal/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "PIPPINNEU") 
@@ -89,9 +89,9 @@ void run(const char *cstr)
 
       int H=200;//200;//392;
 
-      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //länge
+      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //lï¿½nge
       //  nx[1]      =8;//ok mit 8// (int)(2.0*(double)H/8.0/8.0);//2;// (int)(0.2*(double)H/6.0/4.0);//  //breite
-      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //höhe gebiet
+      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //hï¿½he gebiet
 
       //(3/2/2)-ratio:
       nx[1]=nx[2];
@@ -300,7 +300,7 @@ void run(const char *cstr)
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.25));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitorminl3(wallsX1X2minRefl3, 0,refineLevel-3);
          //grid->accept(refineVisitorminl3);
-         /////würfel unten version
+         /////wï¿½rfel unten version
          //      GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.2));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -319,7 +319,7 @@ void run(const char *cstr)
 
 
 
-         /////würfel anfang version
+         /////wï¿½rfel anfang version
          //       GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.56));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -400,10 +400,10 @@ void run(const char *cstr)
          ///interactoren
          //int bbOption1 = 0; //0=simple Bounce Back, 1=quadr. BB
          //D3Q27BoundaryConditionAdapterPtr bcObst(new D3Q27NoSlipBCAdapter(bbOption1));
-         ///////würfel unten version ende
+         ///////wï¿½rfel unten version ende
          ////////////////////////////////////////////////////////////////////////////////
          ////////PM grid
-         //Temporär:
+         //Temporï¿½r:
          //double  H=1.0;
 
          vector<D3Q27InteractorPtr> D3Q27InteractorPtrarray;
diff --git a/apps/cpu/bKanal2/bKanal2.cpp b/apps/cpu/bKanal2/bKanal2.cpp
index 87181a20e7c8542456c220d80e76a214c9b0c779..10e6f988085244e2028f28fca4129bc354c49699 100644
--- a/apps/cpu/bKanal2/bKanal2.cpp
+++ b/apps/cpu/bKanal2/bKanal2.cpp
@@ -24,7 +24,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -222,10 +222,10 @@ void run(const char *cstr)
          ///interactoren
          //int bbOption1 = 0; //0=simple Bounce Back, 1=quadr. BB
          //D3Q27BoundaryConditionAdapterPtr bcObst(new D3Q27NoSlipBCAdapter(bbOption1));
-         ///////würfel unten version ende
+         ///////wï¿½rfel unten version ende
          ////////////////////////////////////////////////////////////////////////////////
          ////////PM grid
-         //Temporär:
+         //Temporï¿½r:
          //double  H=1.0;
 
          vector<D3Q27InteractorPtr> D3Q27InteractorPtrarray;
diff --git a/apps/cpu/bKanalAv/bKanal.cpp b/apps/cpu/bKanalAv/bKanal.cpp
index 067aabbddc7284603bf3d8dfa2c6bf4a5eca99a8..71ca1ed0464afd67adf8db473ccdbf9487b8acda 100644
--- a/apps/cpu/bKanalAv/bKanal.cpp
+++ b/apps/cpu/bKanalAv/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
@@ -77,9 +77,9 @@ void run(const char *cstr)
 
       int H=200;//200;//392;
 
-      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //länge
+      //  nx[0]      =8;//ok mit 8// (int)(3.0*(double)H/8.0/8.0);//2;// (int)(0.3*(double)H/6.0/4.0);//das "/4" hier ist wegen der verfeinerung da! //lï¿½nge
       //  nx[1]      =8;//ok mit 8// (int)(2.0*(double)H/8.0/8.0);//2;// (int)(0.2*(double)H/6.0/4.0);//  //breite
-      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //höhe gebiet
+      nx[2]      = (int)(2.0*(double)H/5.0/8.0);// //hï¿½he gebiet
 
       //(3/2/2)-ratio:
       nx[1]=nx[2];
@@ -289,7 +289,7 @@ void run(const char *cstr)
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.25));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitorminl3(wallsX1X2minRefl3, 0,refineLevel-3);
          //grid->accept(refineVisitorminl3);
-         /////würfel unten version
+         /////wï¿½rfel unten version
          //      GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.2));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -308,7 +308,7 @@ void run(const char *cstr)
 
 
 
-         /////würfel anfang version
+         /////wï¿½rfel anfang version
          //       GbCuboid3DPtr wallsX1X2minRef2(new GbCuboid3D(  originX1-3.0*geoOverlap   , originX2-3.0*geoOverlap  , originX3-3.0*geoOverlap
          //   , originX1+geoLength[0]+geoOverlap, originX2+geoOverlap+geoLength[1], kanalhoeheSI*0.56));
          //RefineCrossAndInsideGbObjectBlockVisitor refineVisitormin2(wallsX1X2minRef2, 0,refineLevel-2);
@@ -389,10 +389,10 @@ void run(const char *cstr)
          ///interactoren
          //int bbOption1 = 0; //0=simple Bounce Back, 1=quadr. BB
          //D3Q27BoundaryConditionAdapterPtr bcObst(new D3Q27NoSlipBCAdapter(bbOption1));
-         ///////würfel unten version ende
+         ///////wï¿½rfel unten version ende
          ////////////////////////////////////////////////////////////////////////////////
          ////////PM grid
-         //Temporär:
+         //Temporï¿½r:
          //double  H=1.0;
 
          //vector<D3Q27InteractorPtr> D3Q27InteractorPtrarray;
diff --git a/apps/cpu/band/band.cpp b/apps/cpu/band/band.cpp
index ed3103ef6b5ab8854b4b2244f01f08776a069b9a..370e50341662d21f25407cec428b1c20ee543a37 100644
--- a/apps/cpu/band/band.cpp
+++ b/apps/cpu/band/band.cpp
@@ -20,7 +20,7 @@ void run(const char *cstr)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -80,9 +80,9 @@ void run(const char *cstr)
 
       ///////////////Knotenabmessungen:
       int nx[3], blocknx[3];
-      nx[0]      = 10;//240;//120;//60;//86;//43;//65;//50;  //länge
+      nx[0]      = 10;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       nx[1]      = 1;//2;//6;///1;//5;// //breite
-      nx[2]      = 2;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      nx[2]      = 2;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       blocknx[0] = 10;//10;//6;
       blocknx[1] = 10;//10;//6;
       blocknx[2] = 10;//10;//6;
@@ -90,12 +90,12 @@ void run(const char *cstr)
       int baseLevel   = 0;
       int refineLevel = 0;
 
-      double H = 0.6; // Kanalhöhe [mm]
+      double H = 0.6; // Kanalhï¿½he [mm]
       //double cdx = H/blocknx[2];
       double cdx = 0.0390625;
       double fdx = cdx/double(1<<refineLevel);
 
-      //double h = 200.0; // gewünschte Plattenhöhe in Gitterpunkten
+      //double h = 200.0; // gewï¿½nschte Plattenhï¿½he in Gitterpunkten
       //double fdx = plate->getLengthX3()/h;
       //double cdx = fdx*double(1<<refineLevel);
 
@@ -107,7 +107,7 @@ void run(const char *cstr)
       double Re            = 680; 
       double rhoLB         = 0.0;
       double uLB           = 0.1; 
-      double lReal         = 0.6; //Zackenhöhe in mm
+      double lReal         = 0.6; //Zackenhï¿½he in mm
       double nuLB          = (uLB*(lReal/cdx))/Re;
 
       Grid3DPtr grid(new Grid3D(comm));
diff --git a/apps/cpu/bbone/bbone.cpp b/apps/cpu/bbone/bbone.cpp
index 30a2b28c32e78f65b4c79d185b302e83e38e8579..3eb6c827c6157c4dc6810ffaab402e3e51337c93 100644
--- a/apps/cpu/bbone/bbone.cpp
+++ b/apps/cpu/bbone/bbone.cpp
@@ -33,7 +33,7 @@ void sbonepd(string configname)
       bool            logToFile         = config.getBool("logToFile");
       double          deltaT            = config.getDouble("deltaT");
       
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
@@ -70,8 +70,8 @@ void sbonepd(string configname)
       LBMReal rho_LB = 0.0;
       //nueWasser = 1e-6 m^2/s
       double nu_real = 1e-6;
-      LBMReal dt = deltaT; //1e-5; // s (frei gewählt)
-      //dx - frei gewählt
+      LBMReal dt = deltaT; //1e-5; // s (frei gewï¿½hlt)
+      //dx - frei gewï¿½hlt
       //
       LBMReal nu_LB = nu_real / (dx*dx / dt);
 
diff --git a/apps/cpu/block_test/block_test_incompressible.hpp b/apps/cpu/block_test/block_test_incompressible.hpp
index d7c1c49f4ca01f2899e1d721afdf9d6b47e870cd..2ce506c93f4611a3069140703e712dbcca7fe661 100644
--- a/apps/cpu/block_test/block_test_incompressible.hpp
+++ b/apps/cpu/block_test/block_test_incompressible.hpp
@@ -29,7 +29,7 @@ void block_test_incompressible(const char *cstr1, const char *cstr2)
       int numOfThreads = UbSystem::stringTo<int>(cf.getValue("numOfThreads"));
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/bond_benchmark/bonb_b_chanel.cpp b/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
index 3753ebea683a520dfd34fd2a78f392f169442214..b5e63c50d5a9ba91abb872319a7e68de9df28b97 100644
--- a/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
+++ b/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
@@ -29,7 +29,7 @@ void chanel(const char *cstr)
 
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = MPICommunicator::getInstance();
+         comm = vf::mpi::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
       
diff --git a/apps/cpu/bond_benchmark/bond_b.cpp b/apps/cpu/bond_benchmark/bond_b.cpp
index ba3221aa058aad9d9b0fb807bbbde05c21bc3bc0..6d607811a21f4dc111f6b003bf9343c60973207c 100644
--- a/apps/cpu/bond_benchmark/bond_b.cpp
+++ b/apps/cpu/bond_benchmark/bond_b.cpp
@@ -35,7 +35,7 @@ void periodic(const char *cstr1, const char *cstr2)
 
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = MPICommunicator::getInstance();
+         comm = vf::mpi::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
 
diff --git a/apps/cpu/bond_test/bond_test.cpp b/apps/cpu/bond_test/bond_test.cpp
index 06e9d7710b7067d289292df29a615763f9b0b5fa..b7091184ff789dd6ac56e8c085853e5a45c088a0 100644
--- a/apps/cpu/bond_test/bond_test.cpp
+++ b/apps/cpu/bond_test/bond_test.cpp
@@ -153,7 +153,7 @@ void simulation(const char *cstr)
       CommunicatorPtr comm;
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = MPICommunicator::getInstance();
+         comm = vf::mpi::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
 
diff --git a/apps/cpu/bone/bone.cpp b/apps/cpu/bone/bone.cpp
index 9ad9321d601fff1caf126b776f9697fdd22876f1..849241ba26fc515ca2ee4ac3bd127742c0c693e5 100644
--- a/apps/cpu/bone/bone.cpp
+++ b/apps/cpu/bone/bone.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
@@ -102,9 +102,9 @@ void run(const char *cstr1, const char *cstr2)
 
       /////////////////Knotenabmessungen:
       //int nx[3], blocknx[3];
-      //nx[0]      = 90;//240;//120;//60;//86;//43;//65;//50;  //länge
+      //nx[0]      = 90;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       //nx[1]      = 2;//2;//6;///1;//5;// //breite
-      //nx[2]      = 30;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      //nx[2]      = 30;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       //blocknx[0] = 16;//10;//6;
       //blocknx[1] = 16;//10;//6;
       //blocknx[2] = 16;//10;//6;
@@ -112,11 +112,11 @@ void run(const char *cstr1, const char *cstr2)
       //int baseLevel   = 0;
       //int refineLevel = 4;
 
-      //double H = 600.0; // Kanalhöhe [mm]
+      //double H = 600.0; // Kanalhï¿½he [mm]
       //double cdx = H/(double)(nx[2]*blocknx[2]);
       //double fdx = cdx/double(1<<refineLevel);
 
-      ////double h = 200.0; // gewünschte Plattenhöhe in Gitterpunkten
+      ////double h = 200.0; // gewï¿½nschte Plattenhï¿½he in Gitterpunkten
       ////double fdx = plate->getLengthX3()/h;
       ////double cdx = fdx*double(1<<refineLevel);
 
@@ -128,7 +128,7 @@ void run(const char *cstr1, const char *cstr2)
       //double Re            = 1133333.3333333335; 
       //double rhoLB         = 0.0;
       //double uLB           = 0.1; 
-      //double lReal         = 1000; //Plattenlänge in mm
+      //double lReal         = 1000; //Plattenlï¿½nge in mm
       //double nuLB          = (uLB*(lReal/cdx))/Re;
 
       //int sizeSP=4;
diff --git a/apps/cpu/f16Test/f16test.cpp b/apps/cpu/f16Test/f16test.cpp
index 58921f91e5fa294fb117a2842626e28d12ade8d4..a73949c8cef45cfa1b576070cb004041f2ac7a0f 100644
--- a/apps/cpu/f16Test/f16test.cpp
+++ b/apps/cpu/f16Test/f16test.cpp
@@ -42,7 +42,7 @@ void run(string configname)
       double          refineDistance = config.getDouble("refineDistance");
       vector<double>  nupsStep = config.getVector<double>("nupsStep");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/insitu_demo/insitu_demo.cpp b/apps/cpu/insitu_demo/insitu_demo.cpp
index 0e58965c47809e9f35403fc7f330ec0135588bbd..42a1c6b4c636801bbaa50a1027751cd88301edfb 100644
--- a/apps/cpu/insitu_demo/insitu_demo.cpp
+++ b/apps/cpu/insitu_demo/insitu_demo.cpp
@@ -15,7 +15,7 @@ void chanel(const char *cstr1)
       double availMem = 0;
 
       //CommunicatorPtr comm = FETOLCommunicator::getInstance();
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
       int mybundle = comm->getBundleID();
diff --git a/apps/cpu/levels/levels.cpp b/apps/cpu/levels/levels.cpp
index 7173a4ba562a95fc93e267c5498957fc4a539da6..0fe328df129ea3f64a135f74f51e425c8d33bd52 100644
--- a/apps/cpu/levels/levels.cpp
+++ b/apps/cpu/levels/levels.cpp
@@ -14,7 +14,7 @@ void run(string configname)
 
       string machine = QUOTEME(CAB_MACHINE);
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
       int mybundle = comm->getBundleID();
diff --git a/apps/cpu/micropart/micropartTestQs3.hpp b/apps/cpu/micropart/micropartTestQs3.hpp
index 11fe1c802e128e5cd36349694de084f7623125f0..14e9a84412a51548b91f668369029afd057241c5 100644
--- a/apps/cpu/micropart/micropartTestQs3.hpp
+++ b/apps/cpu/micropart/micropartTestQs3.hpp
@@ -9,7 +9,7 @@ void micropartTestQs3(const char *cstr)
 {
    try
    {
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
       int numprocs = comm->getNumberOfProcesses();
 
@@ -436,7 +436,7 @@ void micropartTestQs3(const char *cstr)
 
          // if(myid == 0)
          // {
-         // //Abstände "q" als Linien rausschreiben
+         // //Abstï¿½nde "q" als Linien rausschreiben
          // std::vector< UbTupleFloat3 > nodes;
          // std::vector< UbTupleInt2 >   lines;
          // geoInt->addQsLineSet(nodes, lines);
diff --git a/apps/cpu/mirror/mirror.cpp b/apps/cpu/mirror/mirror.cpp
index e4a2f33027bb3b0be3eafb6b0746a14f642107ee..68902fdb7d0dcb74d3eb59d884a0619c8bb0cf6b 100644
--- a/apps/cpu/mirror/mirror.cpp
+++ b/apps/cpu/mirror/mirror.cpp
@@ -49,7 +49,7 @@ void run(string configname)
       string          VRES1100_Spiegel_fein = config.getValue<string>("VRES1100_Spiegel_fein");
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/mpi_benchmark/mpib.cpp b/apps/cpu/mpi_benchmark/mpib.cpp
index 05a251bb2f0632a8cff7b1d0cb42603c5be0438e..6c13de98b743ad251bb1000dafc609660ca4ba46 100644
--- a/apps/cpu/mpi_benchmark/mpib.cpp
+++ b/apps/cpu/mpi_benchmark/mpib.cpp
@@ -8,7 +8,7 @@ using namespace std;
 
 void run(string configname)
 {
-   SPtr<Communicator> comm = MPICommunicator::getInstance();
+   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
    // Get the name of the processor
diff --git a/apps/cpu/pChannel/pChannel.cpp b/apps/cpu/pChannel/pChannel.cpp
index f036c22b6a7f3016a25d20640026db10eb5cb6bf..c2b32108037389ddd18351ff7092cd3d680492ea 100644
--- a/apps/cpu/pChannel/pChannel.cpp
+++ b/apps/cpu/pChannel/pChannel.cpp
@@ -206,7 +206,7 @@ void run(string configname)
       vector<double>  nupsStep          = config.getVector<double>("nupsStep");
       vector<double>  boundingBox       = config.getVector<double>("boundingBox");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/pChannel/pChannel.cpp.hlrn b/apps/cpu/pChannel/pChannel.cpp.hlrn
index a0592ec5b0871b37e3d7f21e0f85cc91ebf561d2..f25a0c4c2e62d6b2b97ff338d567ef911bdc9d14 100644
--- a/apps/cpu/pChannel/pChannel.cpp.hlrn
+++ b/apps/cpu/pChannel/pChannel.cpp.hlrn
@@ -52,7 +52,7 @@ void run(string configname)
       double          timeLineTsStop    = config.getDouble("timeLineTsStop");
 
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/pDisk/pdisk.cpp b/apps/cpu/pDisk/pdisk.cpp
index f24b6b0c185e7ddb45bef85caefb550828e2e998..f19e04ff81222e7eb448c1f0236669fb824fad23 100644
--- a/apps/cpu/pDisk/pdisk.cpp
+++ b/apps/cpu/pDisk/pdisk.cpp
@@ -39,7 +39,7 @@ void run(string configname)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
diff --git a/apps/cpu/perm/perm.cpp b/apps/cpu/perm/perm.cpp
index 21b4bc47abfd650e52c9b235d28fdcdf52b229f2..7fae63025a198a09af2a30da3776f940441dbcc2 100644
--- a/apps/cpu/perm/perm.cpp
+++ b/apps/cpu/perm/perm.cpp
@@ -44,7 +44,7 @@ void perm(string configname)
       double          deltax = config.getValue<double>("deltax");
       bool            writeSampleToFile = config.getValue<bool>("writeSampleToFile");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/perm/perm.cpp_s b/apps/cpu/perm/perm.cpp_s
index 3d6c8457f02a8e9063d1e8fd2758e284e769f1de..21db434d5290ee948665f45af8ae1c93a84d9336 100644
--- a/apps/cpu/perm/perm.cpp_s
+++ b/apps/cpu/perm/perm.cpp_s
@@ -23,7 +23,7 @@ void perm(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (machine == "BOMBADIL")
diff --git a/apps/cpu/plate/plate.cpp b/apps/cpu/plate/plate.cpp
index 7322f5fc89e6c2729e5cf07b9e69894594b1c3d3..28db0262fa649ea93f8b44cf69821557ad53961e 100644
--- a/apps/cpu/plate/plate.cpp
+++ b/apps/cpu/plate/plate.cpp
@@ -25,7 +25,7 @@ void run(const char *cstr, double endTime)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -106,9 +106,9 @@ void run(const char *cstr, double endTime)
 
       //int H=200;//200;//392;
       ///////////////Knotenabmessungen:
-      nx[0]      = 50;//240;//120;//60;//86;//43;//65;//50;  //länge
+      nx[0]      = 50;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       nx[1]      = 1;//2;//6;///1;//5;// //breite
-      nx[2]      = 16;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      nx[2]      = 16;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       blocknx[0] = 25;//10;//6;
       blocknx[1] = 25;//10;//6;
       blocknx[2] = 25;//10;//6;
@@ -117,9 +117,9 @@ void run(const char *cstr, double endTime)
       refineLevel = 4;
 
       ///////////////Weltabmessungen:
-      double kanalhoeheSI  = 60.0/100.0;//60.0/100.0;//cm, Kanalhöhe
-      double kanalbreiteSI = kanalhoeheSI*((double)nx[1])/((double)nx[2]);//=kanalhöhe*nx1/nx2//1.65/100.0;//13.2/100.0;////40.0/100.0; //cm, Kanalbreite //13.2 zeilbreite
-      double kanallaengeSI = kanalhoeheSI*((double)nx[0])/((double)nx[2]);//80.0/100.0;//cm, Kanallänge, ist nicht angegeben
+      double kanalhoeheSI  = 60.0/100.0;//60.0/100.0;//cm, Kanalhï¿½he
+      double kanalbreiteSI = kanalhoeheSI*((double)nx[1])/((double)nx[2]);//=kanalhï¿½he*nx1/nx2//1.65/100.0;//13.2/100.0;////40.0/100.0; //cm, Kanalbreite //13.2 zeilbreite
+      double kanallaengeSI = kanalhoeheSI*((double)nx[0])/((double)nx[2]);//80.0/100.0;//cm, Kanallï¿½nge, ist nicht angegeben
 
       // double refinewidth1=kanalhoeheSI/10.0;
 
@@ -156,7 +156,7 @@ void run(const char *cstr, double endTime)
 
       double hReal         = 0.0105;//<-m     1.05;//Plattendicke in cm(! cm nicht m !)
       double uReal         = 15;//m/s   //Re*nueReal/hReal;
-      double lReal         = 1; //m Plattenlänge
+      double lReal         = 1; //m Plattenlï¿½nge
 
       //##Machzahl:
       //#Ma     = uReal/csReal
diff --git a/apps/cpu/plate2/plate2.cpp b/apps/cpu/plate2/plate2.cpp
index 8fe2cd3dbbd7f9183927e5e09dd1d20ffc6769ea..a908abf5b3652dcdd24c44202950f4962351c735 100644
--- a/apps/cpu/plate2/plate2.cpp
+++ b/apps/cpu/plate2/plate2.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
@@ -85,9 +85,9 @@ void run(const char *cstr1, const char *cstr2)
 
       ///////////////Knotenabmessungen:
       int nx[3], blocknx[3];
-      nx[0]      = 90;//240;//120;//60;//86;//43;//65;//50;  //länge
+      nx[0]      = 90;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       nx[1]      = 2;//2;//6;///1;//5;// //breite
-      nx[2]      = 30;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      nx[2]      = 30;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       blocknx[0] = 16;//10;//6;
       blocknx[1] = 16;//10;//6;
       blocknx[2] = 16;//10;//6;
@@ -95,11 +95,11 @@ void run(const char *cstr1, const char *cstr2)
       int baseLevel   = 0;
       int refineLevel = 4;
 
-      double H = 600.0; // Kanalhöhe [mm]
+      double H = 600.0; // Kanalhï¿½he [mm]
       double cdx = H/(double)(nx[2]*blocknx[2]);
       double fdx = cdx/double(1<<refineLevel);
 
-      //double h = 200.0; // gewünschte Plattenhöhe in Gitterpunkten
+      //double h = 200.0; // gewï¿½nschte Plattenhï¿½he in Gitterpunkten
       //double fdx = plate->getLengthX3()/h;
       //double cdx = fdx*double(1<<refineLevel);
 
@@ -111,7 +111,7 @@ void run(const char *cstr1, const char *cstr2)
       double Re            = 1133333.3333333335; 
       double rhoLB         = 0.0;
       double uLB           = 0.1; 
-      double lReal         = 1000; //Plattenlänge in mm
+      double lReal         = 1000; //Plattenlï¿½nge in mm
       double nuLB          = (uLB*(lReal/cdx))/Re;
 
       int sizeSP=4;
diff --git a/apps/cpu/poiseuille_example/poiseuille.cpp b/apps/cpu/poiseuille_example/poiseuille.cpp
index db9ac4ffb0218e0d0202ea773427d948609aeac5..d5de62d10edb0ddcfb7790febe494f937317f6e9 100644
--- a/apps/cpu/poiseuille_example/poiseuille.cpp
+++ b/apps/cpu/poiseuille_example/poiseuille.cpp
@@ -25,7 +25,7 @@ int main()
     const auto lbmUnitConverter = std::make_shared<LBMUnitConverter>();
     const auto writer = WbWriterVtkXmlBinary::getInstance();
 
-    const auto communicator = MPICommunicator::getInstance();
+    const auto communicator = vf::mpi::MPICommunicator::getInstance();
     const auto kernel = std::make_shared<CompressibleCumulant4thOrderViscosityLBMKernel>();
     kernel->setBCProcessor(std::make_shared<BCProcessor>());
     kernel->setForcingX1(1e-6 * lbmUnitConverter->getFactorForceWToLb());
diff --git a/apps/cpu/porplate2/porplate.cpp b/apps/cpu/porplate2/porplate.cpp
index 109f0b3f75396dcf2d3d410f22fb5da76016344d..2414e07732b18cac8a8c7c61b276f007c16826ef 100644
--- a/apps/cpu/porplate2/porplate.cpp
+++ b/apps/cpu/porplate2/porplate.cpp
@@ -316,7 +316,7 @@ void run(const char *cstr, bool firststart)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
@@ -384,9 +384,9 @@ void run(const char *cstr, bool firststart)
 
       ///////////////Knotenabmessungen:
       int nx[3], blocknx[3];
-      nx[0] = 90;//240;//120;//60;//86;//43;//65;//50;  //länge
+      nx[0] = 90;//240;//120;//60;//86;//43;//65;//50;  //lï¿½nge
       nx[1] = 2;//2;//6;///1;//5;// //breite
-      nx[2] = 30;//64;//32;//18;//5;//15;//15; //höhe gebiet
+      nx[2] = 30;//64;//32;//18;//5;//15;//15; //hï¿½he gebiet
       blocknx[0] = 16;//10;//6;
       blocknx[1] = 16;//10;//6;
       blocknx[2] = 16;//10;//6;
@@ -394,11 +394,11 @@ void run(const char *cstr, bool firststart)
       int baseLevel = 0;
       int refineLevel = 5;
 
-      double H = 600.0; // Kanalhöhe [mm]
+      double H = 600.0; // Kanalhï¿½he [mm]
       double cdx = H / (double)(nx[2] * blocknx[2]);
       double fdx = cdx / double(1 << refineLevel);
 
-      //double h = 200.0; // gewünschte Plattenhöhe in Gitterpunkten
+      //double h = 200.0; // gewï¿½nschte Plattenhï¿½he in Gitterpunkten
       //double fdx = plate->getLengthX3()/h;
       //double cdx = fdx*double(1<<refineLevel);
 
@@ -413,7 +413,7 @@ void run(const char *cstr, bool firststart)
       // Re = 1000000
       // V = 16.05  # m / s
       // p = 994.7  #hPa(manuell abgelesen von MUB)
-      // T = 21.78  #°C
+      // T = 21.78  #ï¿½C
       // Luftfeuchte = 50.5   # %
       //////////////////////////////////////////////////////////////////////////
       // Simulation Parametr
@@ -421,7 +421,7 @@ void run(const char *cstr, bool firststart)
       double Re = 1e6; // 1133333.3333333335;
       double rhoLB = 0.0;
       double uLB = 0.1;
-      double lReal = 1000; //Plattenlänge in mm
+      double lReal = 1000; //Plattenlï¿½nge in mm
       double nuLB = (uLB*(lReal / cdx)) / Re;
 
       int sizeSP = 4;
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index 3c7907d3b1bc13547d64abd2180657d20b250704..c972d8fec3da4c6d4191948c7fcaafc9f061d13b 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -40,7 +40,7 @@ void bflow(string configname)
 
       //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/sbone/sbone.cpp b/apps/cpu/sbone/sbone.cpp
index 845994925ff18b01189898ad59ebdaab9d851f65..321396da68d290946c16b36955cc1be98c10cf84 100644
--- a/apps/cpu/sbone/sbone.cpp
+++ b/apps/cpu/sbone/sbone.cpp
@@ -23,7 +23,7 @@ void sbonepd(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
@@ -71,8 +71,8 @@ void sbonepd(const char *configname)
       LBMReal rho_LB = 0.0;
       //nueWasser = 1e-6 m^2/s
       double nu_real = 1e-6;
-      LBMReal dt = 5e-8; // s (frei gewählt)
-      //dx - frei gewählt
+      LBMReal dt = 5e-8; // s (frei gewï¿½hlt)
+      //dx - frei gewï¿½hlt
       //
       LBMReal nu_LB = nu_real/(dx*dx/dt);
 
diff --git a/apps/cpu/screw/screw.cpp b/apps/cpu/screw/screw.cpp
index 099ae784987b4c829a00018e712df690b0660c38..c1eea3f960cf6b4b64757843d08d126e4fac14b9 100644
--- a/apps/cpu/screw/screw.cpp
+++ b/apps/cpu/screw/screw.cpp
@@ -29,7 +29,7 @@ int main(int argc, char* argv[])
       int             restartStep  = config.getValue<int>("restartStep");
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
diff --git a/apps/cpu/sphere/sphere.cpp b/apps/cpu/sphere/sphere.cpp
index 3b725f98cf62b5c987cbbde9f0698b5d9bc337c1..5ab9a2a70f59273c326d7757faf13e4e338c6614 100644
--- a/apps/cpu/sphere/sphere.cpp
+++ b/apps/cpu/sphere/sphere.cpp
@@ -9,7 +9,7 @@ void run(string configname)
 {
    try
    {
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
 
diff --git a/apps/cpu/stick/stick.cpp b/apps/cpu/stick/stick.cpp
index 058b767fcae53286428dad0858b42158b1113ae0..62efec8098241d440a2b2292ca5018fea915fe4e 100644
--- a/apps/cpu/stick/stick.cpp
+++ b/apps/cpu/stick/stick.cpp
@@ -19,7 +19,7 @@ void main()
       int numOfThreads = 4;
       double availMem = 10e9;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       double dx = 1;
diff --git a/apps/cpu/teperm/teperm.cpp b/apps/cpu/teperm/teperm.cpp
index 51638fa26e5675e69bf9e6ce23950f6cf9a47ec6..aecdb3745f3da37c03b07eb7b103374a99df4302 100644
--- a/apps/cpu/teperm/teperm.cpp
+++ b/apps/cpu/teperm/teperm.cpp
@@ -63,7 +63,7 @@ void run(string configname)
       int             chunk = config.getValue<int>("chunk");
 
 
-      SPtr<Communicator> comm = MPICommunicator::getInstance();
+      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/town/town.cpp b/apps/cpu/town/town.cpp
index 8f9b97857625939606874b1d539a01e8b54bb52e..ccaf90f8d277df9e16a5e3592eafa649e142d235 100644
--- a/apps/cpu/town/town.cpp
+++ b/apps/cpu/town/town.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
diff --git a/apps/gpu/GKS/Flame7cm/CMakeLists.txt b/apps/gpu/GKS/Flame7cm/CMakeLists.txt
index c3f57dcdd8c9ea9e8ba891d72c039f5acdfc9cf9..75ca5fa4b4e9c51724d32a3733559b9489ce7943 100644
--- a/apps/gpu/GKS/Flame7cm/CMakeLists.txt
+++ b/apps/gpu/GKS/Flame7cm/CMakeLists.txt
@@ -1,6 +1,5 @@
-PROJECT(Flame7cm)
+PROJECT(Flame7cm LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator GksMeshAdapter GksVtkAdapter GksGpu FILES Flame7cm.cpp )
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics GridGenerator GksMeshAdapter GksVtkAdapter GksGpu MPI::MPI_CXX FILES Flame7cm.cpp )
 
-include (${VF_CMAKE_DIR}/3rd/cuda.cmake)
-include (${VF_CMAKE_DIR}/3rd/mpi.cmake)
+set_source_files_properties(Flame7cm.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp b/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
index e0b736dfe5fb1be1a038bfb4ea6ad88b570f5951..4323ce5ae3bf8486a2203adec470e0d1fdc05a70 100644
--- a/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
+++ b/apps/gpu/GKS/Flame7cm/Flame7cm.cpp
@@ -152,11 +152,10 @@ void thermalCavity( std::string path, std::string simulationName, uint _gpuIndex
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+    // auto gridFactory = GridFactory::make();
+    // gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
 
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    auto gridBuilder = MultipleGridBuilder::makeShared();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -476,14 +475,14 @@ int main( int argc, char* argv[])
     {
         thermalCavity( path, simulationName, gpuIndex, nx, useTempLimiter, restartIter );
     }
-    catch (const std::exception& e)
-    {     
-        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-    }
     catch (const std::bad_alloc& e)
     {  
         *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
     }
+    catch (const std::exception& e)
+    {     
+        *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+    }
     catch (...)
     {
         *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 3089e1d7f314ad25aa400685f65defa6396c93e4..3f5e250e1b74ded1438fc4436b1d9e49d315040e 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -9,18 +9,14 @@
 #include <exception>
 #include <memory>
 
-#include "mpi.h"
-
 //////////////////////////////////////////////////////////////////////////
 
 #include "Core/DataTypes.h"
 #include "PointerDefinitions.h"
 
-#include "Core/LbmOrGks.h"
 #include "Core/StringUtilities/StringUtil.h"
 
 #include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
 
 #include <basics/config/ConfigurationFile.h>
 
@@ -117,143 +113,121 @@ void multipleLevel(const std::string& configPath)
 	gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    if( lbmOrGks == LBM )
-    {
-        vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
-        vf::basics::ConfigurationFile config;
-        config.load(configPath);
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+    vf::basics::ConfigurationFile config;
+    config.load(configPath);
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-        const real dt = dx * mach / (sqrt(3) * velocity);
+    const real dt = dx * mach / (sqrt(3) * velocity);
 
-        const real velocityLB = velocity * dt / dx; // LB units
+    const real velocityLB = velocity * dt / dx; // LB units
 
-        const real viscosityLB = viscosity * dt / (dx * dx); // LB units
+    const real viscosityLB = viscosity * dt / (dx * dx); // LB units
 
-        VF_LOG_INFO("velocity  [dx/dt] = {}", velocityLB);
-        VF_LOG_INFO("viscosity [10^8 dx^2/dt] = {}", viscosityLB*1e8);
+    VF_LOG_INFO("velocity  [dx/dt] = {}", velocityLB);
+    VF_LOG_INFO("viscosity [10^8 dx^2/dt] = {}", viscosityLB*1e8);
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-		para->setDevices(std::vector<uint>{(uint)0});
+    para->setDevices(std::vector<uint>{(uint)0});
 
-        para->setOutputPrefix( simulationName );
+    para->setOutputPrefix( simulationName );
 
-        para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
+    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
 
-        para->setPrintFiles(true);
+    para->setPrintFiles(true);
 
-        para->setMaxLevel(1);
+    para->setMaxLevel(1);
 
-        para->setVelocity(velocityLB);
-        para->setViscosity(viscosityLB);
 
-        para->setVelocityRatio( dx / dt );
-        para->setViscosityRatio( dx*dx / dt );
+    para->setVelocity(velocityLB);
+    para->setViscosity(viscosityLB);
+    para->setVelocityRatio( dx / dt );
+    para->setViscosityRatio( dx*dx/dt );
+    para->setMainKernel("CumulantK17CompChim");
 
-		para->setMainKernel("CumulantK17CompChim");
+    para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
+        rho = (real)0.0;
+        vx  = velocityLB;
+        vy  = (real)0.0;
+        vz  = (real)0.0;
+    });
 
-		para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
-            rho = (real)0.0;
-            vx  = velocityLB;
-            vy  = (real)0.0;
-            vz  = (real)0.0;
-        });
+    para->setTOut( timeStepOut );
+    para->setTEnd( uint(tEnd/dt) );
 
-        para->setTOut( timeStepOut );
-        para->setTEnd( uint(tEnd/dt) );
+    para->setIsBodyForce( true );
 
-        para->setIsBodyForce( true );
 
+    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    gridBuilder->setVelocityBoundaryCondition(SideType::MX,  velocityLB,  0.0, 0.0);
+    gridBuilder->setVelocityBoundaryCondition(SideType::PX,  velocityLB,  0.0, 0.0);
+    gridBuilder->setVelocityBoundaryCondition(SideType::MY,  velocityLB,  0.0, 0.0);
+    gridBuilder->setVelocityBoundaryCondition(SideType::PY,  velocityLB,  0.0, 0.0);
+    gridBuilder->setVelocityBoundaryCondition(SideType::MZ,  velocityLB,  0.0, 0.0);
+    gridBuilder->setVelocityBoundaryCondition(SideType::PZ,  velocityLB,  0.0, 0.0);
 
-        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        gridBuilder->setVelocityBoundaryCondition(SideType::MX,  velocityLB,  0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PX,  velocityLB,  0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::MY,  velocityLB,  0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PY,  velocityLB,  0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::MZ,  velocityLB,  0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PZ,  velocityLB,  0.0, 0.0);
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
 
-        SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
+    SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
 
-        SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
+    real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter};
+    real epsilon = 5.f; // width of gaussian smearing
+    real density = 1.225f;
+    int level = 0;
+    uint nBlades = 3;
+    uint nBladeNodes = 32;
 
-        real turbPos[3] = {3*reference_diameter, 3*reference_diameter, 3*reference_diameter};
-        real epsilon = 5.f; // width of gaussian smearing
-        real density = 1.225f;
-        int level = 0;
-        uint nBlades = 3;
-        uint nBladeNodes = 32;
+    SPtr<ActuatorLine> actuator_line =SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) );
+    para->addActuator( actuator_line );
 
+    SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", 100, 500, 100) );
+    std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
+    std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+    std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
+    pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
+    // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
+    pointProbe->addPostProcessingVariable(PostProcessingVariable::Means);
+    pointProbe->addPostProcessingVariable(PostProcessingVariable::Variances);
+    para->addProbe( pointProbe );
 
-        SPtr<ActuatorLine> actuator_line = SPtr<ActuatorLine>( new ActuatorLine(nBlades, density, nBladeNodes, epsilon, turbPos[0], turbPos[1], turbPos[2], reference_diameter, level, dt, dx) );
-        para->addActuator( actuator_line );
+    SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", 100, 500, 100) );
+    planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
+    planeProbe->addPostProcessingVariable(PostProcessingVariable::Means);
+    para->addProbe( planeProbe );
 
-        SPtr<PointProbe> pointProbe = SPtr<PointProbe>( new PointProbe("pointProbe", 100, 500, 100) );
-        std::vector<real> probeCoordsX = {reference_diameter,2*reference_diameter,5*reference_diameter};
-        std::vector<real> probeCoordsY = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-        std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
-        pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
-        // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
-        pointProbe->addPostProcessingVariable(PostProcessingVariable::Means);
-        pointProbe->addPostProcessingVariable(PostProcessingVariable::Variances);
-        para->addProbe( pointProbe );
 
-        SPtr<PlaneProbe> planeProbe = SPtr<PlaneProbe>( new PlaneProbe("planeProbe", 100, 500, 100) );
-        planeProbe->setProbePlane(5*reference_diameter, 0, 0, dx, L_y, L_z);
-        planeProbe->addPostProcessingVariable(PostProcessingVariable::Means);
-        para->addProbe( planeProbe );
 
 
-
-
-        Simulation sim;
-        SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
-        SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
-        SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
-        sim.setFactories(kernelFactory, preProcessorFactory);
-        sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);        
-        sim.run();
-        sim.free();
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    }
+    Simulation sim(communicator);
+    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
+    SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
+    SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
+    sim.setFactories(kernelFactory, preProcessorFactory);
+    sim.init(para, gridGenerator, fileWriter, cudaMemoryManager);        
+    sim.run();
+    sim.free();
 }
 
 int main( int argc, char* argv[])
 {
-    MPI_Init(&argc, &argv);
-    std::string str, str2; 
     if ( argv != NULL )
     {
-        //str = static_cast<std::string>(argv[0]);
-        
         try
         {
-            //////////////////////////////////////////////////////////////////////////
-
             vf::logging::Logger::initalizeLogger();
 
             if( argc > 1){ path = argv[1]; }
 
-			multipleLevel(path + "/configActuatorLine.txt");
-
-            //////////////////////////////////////////////////////////////////////////
-		}
+            multipleLevel(path + "/configActuatorLine.txt");
+        }
         catch (const spdlog::spdlog_ex &ex) {
             std::cout << "Log initialization failed: " << ex.what() << std::endl;
         }
@@ -271,7 +245,5 @@ int main( int argc, char* argv[])
             VF_LOG_CRITICAL("Unknown exception!");
         }
     }
-
-    MPI_Finalize();
     return 0;
 }
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 66542317449cc949f1cecf0498c111e6446235cb..a660f23a776f3e9103c5190b468b41454d8a4f97 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -10,18 +10,14 @@
 #include <memory>
 #include <filesystem>
 
-#include "mpi.h"
-
 //////////////////////////////////////////////////////////////////////////
 
 #include "Core/DataTypes.h"
 #include "PointerDefinitions.h"
 
-#include "Core/LbmOrGks.h"
 #include "Core/StringUtilities/StringUtil.h"
 
 #include "Core/VectorTypes.h"
-#include "Core/Logger/Logger.h"
 
 #include <basics/config/ConfigurationFile.h>
 
@@ -99,8 +95,6 @@ const real dt = (real)1.0e-3; //0.5e-3;
 
 const uint nx = 64;
 
-//std::string path("F:/Work/Computations/out/DrivenCavity/"); //LEGOLAS
-//std::string path("D:/out/DrivenCavity"); //Mollok
 std::string path(".");
 
 std::string simulationName("DrivenCavityChim");
@@ -147,12 +141,12 @@ void multipleLevel(const std::string& configPath)
     {
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+        vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
         vf::basics::ConfigurationFile config;
         config.load(configPath);
 
-        SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+        SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -210,7 +204,7 @@ void multipleLevel(const std::string& configPath)
 
         SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
 
-        Simulation sim;
+        Simulation sim(communicator);
         SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
         SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
         SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
@@ -338,8 +332,6 @@ void multipleLevel(const std::string& configPath)
 
 int main( int argc, char* argv[])
 {
-    MPI_Init(&argc, &argv);
-
     try
     {
         vf::logging::Logger::initalizeLogger();
@@ -366,6 +358,5 @@ int main( int argc, char* argv[])
         VF_LOG_CRITICAL("Unknown exception!");
     }
 
-   MPI_Finalize();
    return 0;
 }
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index bc0fdfa440a1eb1fa466bccf3a68e6216a513fbb..88ec364ea0e7d6a9010d67dac26f4a442db45e8f 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -54,6 +54,7 @@
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
+#include <logger/Logger.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -187,12 +188,12 @@ void multipleLevel(const std::string& configPath)
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     const real velocityLB = (real)0.0844; // LB units
@@ -323,7 +324,7 @@ void multipleLevel(const std::string& configPath)
 
     SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
 
-    Simulation sim;
+    Simulation sim (communicator);
     SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
     SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
     SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
@@ -717,31 +718,31 @@ std::string chooseVariation()
 
 int main( int argc, char* argv[])
 {
-    MPI_Init(&argc, &argv);
-    if ( argv != NULL )
+    try
     {
-        try
-        {
-            // assuming that the config files is stored parallel to this file.
-            std::filesystem::path filePath = __FILE__;
-            filePath.replace_filename("configDrivenCavity.txt");
+        vf::logging::Logger::initalizeLogger();
 
-            multipleLevel(filePath.string());
-        }
-        catch (const std::bad_alloc& e)
-        { 
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-        }
-        catch (const std::exception& e)
-        {   
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-        }
-        catch (...)
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-        }
+        // assuming that the config files is stored parallel to this file.
+        std::filesystem::path filePath = __FILE__;
+        filePath.replace_filename("configDrivenCavity.txt");
+
+        multipleLevel(filePath.string());
+    }
+    catch (const spdlog::spdlog_ex &ex) {
+        std::cout << "Log initialization failed: " << ex.what() << std::endl;
+    }
+    catch (const std::bad_alloc& e)
+    { 
+        VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+    }
+    catch (const std::exception& e)
+    {   
+        VF_LOG_CRITICAL("exception: {}", e.what());
+    }
+    catch (...)
+    {
+        VF_LOG_CRITICAL("Unknown exception!");
     }
 
-   MPI_Finalize();
    return 0;
 }
diff --git a/cpu.cmake b/cpu.cmake
index 1bd1d913eb5d6ce5575ec9599e570a24a4acb888..a6220ec1ffb9641b824ee26b8be8497ea340173f 100644
--- a/cpu.cmake
+++ b/cpu.cmake
@@ -23,7 +23,6 @@
 #ENDIF()
 
 SET(USE_METIS ON CACHE BOOL "include METIS library support")
-SET(USE_MPI ON CACHE BOOL "include MPI library support")
 SET(USE_VTK OFF CACHE BOOL "include VTK library support")
 SET(USE_CATALYST OFF CACHE BOOL "include Paraview Catalyst support")
 
@@ -50,9 +49,6 @@ ENDIF()
 IF(${USE_METIS})
     list(APPEND VF_COMPILER_DEFINITION VF_METIS)
 ENDIF()
-IF(${USE_MPI})
-    list(APPEND VF_COMPILER_DEFINITION VF_MPI)
-ENDIF()
 IF(${USE_VTK})
     list(APPEND VF_COMPILER_DEFINITION VF_VTK)
 ENDIF()
diff --git a/src/basics/Singelton.h b/src/basics/Singelton.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0979b5dd3d89e26ebbe4b4e82d2336e1f59a07e
--- /dev/null
+++ b/src/basics/Singelton.h
@@ -0,0 +1,58 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \author Soeren Peters
+//=======================================================================================
+
+#ifndef BASICS_SINGELTON_H
+#define BASICS_SINGELTON_H
+
+namespace vf::basics
+{
+
+template<typename T>
+class Singleton
+{
+public:
+   Singleton(const Singleton&) = delete;
+   Singleton & operator=(const Singleton& rhs) = delete;
+
+protected:
+   Singleton() = default;
+
+public:
+   static std::shared_ptr<Singleton> getInstance()
+   {
+     static std::shared_ptr<Singleton> s{new T};
+     return s;
+   }
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/basics/basics/utilities/UbException.h b/src/basics/basics/utilities/UbException.h
index 9a458980688145c199bf7193000131aeb5fb5e30..5c9fef87bb65b12c1216111ddb9ca1e5eba58ab3 100644
--- a/src/basics/basics/utilities/UbException.h
+++ b/src/basics/basics/utilities/UbException.h
@@ -114,7 +114,7 @@ public:
     /*==========================================================*/
     virtual void addInfo(const std::string &err_str)
     {
-        exceptionData.push_back(makeUbTuple((std::string) "-", 0, (std::string) "unknown", err_str));
+        exceptionData.push_back(makeUbTuple(std::string("-"), 0, std::string("unknown"), err_str));
     }
     /*==========================================================*/
     // add exception
diff --git a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp
index dacf8cfc870566455d1c91d94fd27e17239690e9..6f32a053afb4f6f45ae74e32b7f8665ab4fd58db 100644
--- a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp
+++ b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.cpp
@@ -1,5 +1,5 @@
 #include "CreateDemObjectsCoProcessor.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DemCoProcessor.h"
 #include "EquilibriumReconstructor.h"
 #include "ExtrapolationReconstructor.h"
@@ -19,7 +19,7 @@
 #include "muParser.h"
 
 CreateDemObjectsCoProcessor::CreateDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
-                                                         std::shared_ptr<Communicator> comm,
+                                                         std::shared_ptr<vf::mpi::Communicator> comm,
                                                          SPtr<DemCoProcessor> demCoProcessor,
                                                          SPtr<PhysicsEngineMaterialAdapter> demObjectMaterial,
                                                          double tolerance)
diff --git a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h
index c8ecc6842ee0f77142ad05e5cb90ed71baaa5d64..7da317e67bd932f7d594c68d63ebc117b50c1e85 100644
--- a/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h
+++ b/src/cpu/DemCoupling/CreateDemObjectsCoProcessor.h
@@ -14,7 +14,7 @@
 
 class Grid3D;
 class UbScheduler;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class DemCoProcessor;
 class GbObject3D;
 class BCAdapter;
@@ -24,7 +24,7 @@ class PhysicsEngineMaterialAdapter;
 class CreateDemObjectsCoProcessor : public CoProcessor
 {
 public:
-    CreateDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::shared_ptr<Communicator> comm,
+    CreateDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
                                 SPtr<DemCoProcessor> demCoProcessor,
                                 SPtr<PhysicsEngineMaterialAdapter> geoObjectMaterial, double tolerance = 0);
     void process(double step) override;
@@ -36,7 +36,7 @@ public:
 
 protected:
 private:
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<DemCoProcessor> demCoProcessor;
     std::vector<SPtr<GbObject3D>> geoObjectPrototypeVector;
     SPtr<PhysicsEngineMaterialAdapter> demObjectMaterial;
diff --git a/src/cpu/DemCoupling/DemCoProcessor.cpp b/src/cpu/DemCoupling/DemCoProcessor.cpp
index 554ffaa6c76f36ad0b5834854c88b8f14864efe8..642a942d7d96b73af898690a5737f53d2d88b1a5 100644
--- a/src/cpu/DemCoupling/DemCoProcessor.cpp
+++ b/src/cpu/DemCoupling/DemCoProcessor.cpp
@@ -1,7 +1,7 @@
 #include "DemCoProcessor.h"
 
 #include "BCProcessor.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
 #include "ForceCalculator.h"
@@ -29,7 +29,7 @@
 #include <array>
 #include <functional>
 
-DemCoProcessor::DemCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Communicator> comm,
+DemCoProcessor::DemCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
                                std::shared_ptr<ForceCalculator> forceCalculator,
                                std::shared_ptr<PhysicsEngineSolverAdapter> physicsEngineSolver,
                                double intermediatePeSteps)
diff --git a/src/cpu/DemCoupling/DemCoProcessor.h b/src/cpu/DemCoupling/DemCoProcessor.h
index 64fe2436128cb6eb5aabe118785213d1212ca296..d2946f1e93fcaedc69d44a83a68dc2079910e48f 100644
--- a/src/cpu/DemCoupling/DemCoProcessor.h
+++ b/src/cpu/DemCoupling/DemCoProcessor.h
@@ -31,15 +31,14 @@ class PePhysicsEngineGeometryAdapter;
 class UbScheduler;
 class Grid3D;
 class ForceCalculator;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class MovableObjectInteractor;
-class Communicator;
 class BoundaryConditionsBlockVisitor;
 
 class DemCoProcessor : public CoProcessor
 {
 public:
-    DemCoProcessor(std::shared_ptr<Grid3D> grid, std::shared_ptr<UbScheduler> s, std::shared_ptr<Communicator> comm,
+    DemCoProcessor(std::shared_ptr<Grid3D> grid, std::shared_ptr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm,
                    std::shared_ptr<ForceCalculator> forceCalculator,
                    std::shared_ptr<PhysicsEngineSolverAdapter> physicsEngineSolver, double intermediatePeSteps = 1.0);
     virtual ~DemCoProcessor();
@@ -74,7 +73,7 @@ private:
     std::shared_ptr<PePhysicsEngineGeometryAdapter> getPeGeoAdapter(unsigned long long systemId);
 
 private:
-    std::shared_ptr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<std::shared_ptr<MovableObjectInteractor>> interactors;
     std::shared_ptr<ForceCalculator> forceCalculator;
     std::shared_ptr<PePhysicsEngineSolverAdapter> physicsEngineSolver;
diff --git a/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp b/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp
index 8dbe680a89e913c71ce116e1203bb5d005d4bbdd..429eaeb8be0d3a601b64199e5e86279f7d05ce8f 100644
--- a/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp
+++ b/src/cpu/DemCoupling/PePartitioningGridVisitor.cpp
@@ -2,7 +2,7 @@
 
 #include "PePartitioningGridVisitor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "Grid3D.h"
 #include "UbLogger.h"
@@ -13,7 +13,7 @@
 
 using namespace std;
 
-PePartitioningGridVisitor::PePartitioningGridVisitor(SPtr<Communicator> comm, std::shared_ptr<DemCoProcessor> dem)
+PePartitioningGridVisitor::PePartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, std::shared_ptr<DemCoProcessor> dem)
     : Grid3DVisitor(), comm(comm), dem(dem)
 {
     forest = dynamicPointerCast<PePhysicsEngineSolverAdapter>(dem->getPhysicsEngineSolver())->getForest();
diff --git a/src/cpu/DemCoupling/PePartitioningGridVisitor.h b/src/cpu/DemCoupling/PePartitioningGridVisitor.h
index ff97d531d7f6ccaebb8c7e3a1ab60c2c27177bf5..cad80c0f4d986c45560c6111e8943226df136d24 100644
--- a/src/cpu/DemCoupling/PePartitioningGridVisitor.h
+++ b/src/cpu/DemCoupling/PePartitioningGridVisitor.h
@@ -16,7 +16,7 @@
 //! \brief The class implements domain decomposition with PE library
 //! \author Konstantin Kutscher
 //////////////////////////////////////////////////////////////////////////
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class Block3D;
 class DemCoProcessor;
@@ -32,7 +32,7 @@ public:
     //! Constructor
     //! \param comm - communicator
 
-    PePartitioningGridVisitor(SPtr<Communicator> comm, std::shared_ptr<DemCoProcessor> dem);
+    PePartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, std::shared_ptr<DemCoProcessor> dem);
     virtual ~PePartitioningGridVisitor();
     void visit(SPtr<Grid3D> grid) override;
 
@@ -44,7 +44,7 @@ protected:
     SPtr<Block3D> getBlockByMinUniform(double minX1, double minX2, double minX3, SPtr<Grid3D> grid);
 
 private:
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     std::shared_ptr<DemCoProcessor> dem;
 
     std::vector<int> ids;
diff --git a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp
index 1d018376319ce7bed6eca6e4ef7f4492237699fb..ff6cbe7e5a3e394bac18016507a57308d0f1ecbf 100644
--- a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp
+++ b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.cpp
@@ -1,6 +1,6 @@
 #include "RestartDemObjectsCoProcessor.h"
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CreateDemObjectsCoProcessor.h"
 #include "DemCoProcessor.h"
 #include "GbSphere3D.h"
@@ -15,7 +15,7 @@ RestartDemObjectsCoProcessor::RestartDemObjectsCoProcessor() {}
 
 RestartDemObjectsCoProcessor::RestartDemObjectsCoProcessor(
     SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<DemCoProcessor> demCoProcessor,
-    SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor, double radius, SPtr<Communicator> comm)
+    SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor, double radius, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), demCoProcessor(demCoProcessor),
       createDemObjectsCoProcessor(createDemObjectsCoProcessor), radius(radius), comm(comm)
 {
diff --git a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h
index 08fb70ef5821ceb3d53d8e4d1e0489519cc8f881..5123a2d6e51ece8e96d6623d573141a8c272026f 100644
--- a/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h
+++ b/src/cpu/DemCoupling/RestartDemObjectsCoProcessor.h
@@ -11,7 +11,7 @@
 
 #include "CoProcessor.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class DemCoProcessor;
@@ -24,7 +24,7 @@ public:
     RestartDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                  SPtr<DemCoProcessor> demCoProcessor,
                                  SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor, double radius,
-                                 SPtr<Communicator> comm);
+                                 std::shared_ptr<vf::mpi::Communicator> comm);
     ~RestartDemObjectsCoProcessor() {}
     void process(double step) override;
     void restart(double step);
@@ -34,7 +34,7 @@ public:
 private:
     std::string path;
     double radius;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<DemCoProcessor> demCoProcessor;
     SPtr<CreateDemObjectsCoProcessor> createDemObjectsCoProcessor;
 };
diff --git a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp
index 03b0c278373a8eb1b74172f31cef044442cccae4..3e22c90cf266fa8593b0036d160d79080a3ad31c 100644
--- a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp
+++ b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "basics/writer/WbWriterVtkXmlASCII.h"
 #include "basics/writer/WbWriterVtkXmlBinary.h"
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DemCoProcessor.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
@@ -13,7 +13,7 @@ WriteDemObjectsCoProcessor::WriteDemObjectsCoProcessor() {}
 //////////////////////////////////////////////////////////////////////////
 WriteDemObjectsCoProcessor::WriteDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                                        WbWriter *const writer, SPtr<DemCoProcessor> demCoProcessor,
-                                                       SPtr<Communicator> comm)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), writer(writer), demCoProcessor(demCoProcessor), comm(comm)
 {
 }
diff --git a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h
index 6c45b86322a792bcdb165b70082d5ae08b6cc496..7fb3b045ccd439d772ef565c2013af32c75a7a2d 100644
--- a/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h
+++ b/src/cpu/DemCoupling/WriteDemObjectsCoProcessor.h
@@ -11,7 +11,7 @@
 
 #include "CoProcessor.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class DemCoProcessor;
@@ -22,14 +22,14 @@ class WriteDemObjectsCoProcessor : public CoProcessor
 public:
     WriteDemObjectsCoProcessor();
     WriteDemObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                               SPtr<DemCoProcessor> demCoProcessor, SPtr<Communicator> comm);
+                               SPtr<DemCoProcessor> demCoProcessor, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteDemObjectsCoProcessor() {}
     void process(double step) override;
 
 private:
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<DemCoProcessor> demCoProcessor;
 };
 #endif
diff --git a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp
index 693a23c5cf6157dbdc3f3011330926ecae79b6bf..401ea91bc7225eea7f871cbc2e92be44d1a5c9d7 100644
--- a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp
+++ b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.cpp
@@ -3,13 +3,13 @@
 #include "basics/writer/WbWriterVtkXmlASCII.h"
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
 
 WritePeBlocksCoProcessor::WritePeBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   WbWriter *const writer, SPtr<Communicator> comm,
+                                                   WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm,
                                                    SPtr<walberla::blockforest::BlockForest> forest)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm), forest(forest)
 {
diff --git a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h
index 72334abd5e55fe4b9e4700540fca185db68f44b4..ae27d50b3f0bba867db7ad8cce79f2e5d8fd5681 100644
--- a/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h
+++ b/src/cpu/DemCoupling/WritePeBlocksCoProcessor.h
@@ -15,7 +15,7 @@
 
 #include <pe/basic.h>
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -24,7 +24,7 @@ class WritePeBlocksCoProcessor : public CoProcessor
 {
 public:
     WritePeBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                             SPtr<Communicator> comm, SPtr<walberla::blockforest::BlockForest> forest);
+                             std::shared_ptr<vf::mpi::Communicator> comm, SPtr<walberla::blockforest::BlockForest> forest);
     virtual ~WritePeBlocksCoProcessor();
 
     void process(double step) override;
@@ -34,7 +34,7 @@ protected:
 
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<walberla::blockforest::BlockForest> forest;
 };
 
diff --git a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp
index dd78dc143fd0aa149663ae674146198491917644..14cef406392fbfbd9862a71b0c054df85a8608ec 100644
--- a/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp
+++ b/src/cpu/DemCoupling/physicsEngineAdapter/pe/PePhysicsEngineSolverAdapter.cpp
@@ -8,7 +8,7 @@
 #include <pe/basic.h>
 #include <pe/rigidbody/UnionFactory.h>
 //#include "geometry/GeometricalFunctions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "PeAdapter.h"
 #include "PeLoadBalancerAdapter.h"
 #include "PePhysicsEngineGeometryAdapter.h"
diff --git a/src/cpu/VirtualFluids.h b/src/cpu/VirtualFluids.h
index f52a4634f8266a886598558c205560a85bdbc21e..5c83ea7cb75e066eba0f424da0e5906dc9123136 100644
--- a/src/cpu/VirtualFluids.h
+++ b/src/cpu/VirtualFluids.h
@@ -40,6 +40,10 @@
 #include <omp.h>
 #endif
 
+#include <mpi/Communicator.h>
+#include <mpi/MPICommunicator.h>
+#include <mpi/NullCommunicator.h>
+
 #include <basics/PointerDefinitions.h>
 
 #include <basics/config/ConfigurationFile.h>
@@ -284,10 +288,7 @@
 #include <geometry3d/KdTree/splitalgorithms/KdSplitAlgorithm.h>
 
 #include <Parallel/BlocksDistributor.h>
-#include <Parallel/Communicator.h>
-#include <Parallel/MPICommunicator.h>
 #include <Parallel/MetisPartitioner.h>
-#include <Parallel/NullCommunicator.h>
 #include <Parallel/PriorityQueueDecompositor.h>
 #include <Parallel/SimpleGeometricPartitioner.h>
 #include <Parallel/ZoltanPartitioner.h>
diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt
index 36ac278fb8aee484d38a09a3fd4499965875f712..15cdceffd99515f84d60c4b6169e2da7e74ecfc3 100644
--- a/src/cpu/VirtualFluidsCore/CMakeLists.txt
+++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt
@@ -25,7 +25,7 @@ if(BUILD_USE_OPENMP)
    list(APPEND VF_LIBRARIES OpenMP::OpenMP_CXX)
 endif()
 
-vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser MPI::MPI_CXX ${VF_LIBRARIES} PRIVATE_LINK lbm)
+vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser ${VF_LIBRARIES} PRIVATE_LINK lbm mpi logger)
 
 
 vf_get_library_name(library_name)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
index 2174090103ddfd5c806d62cb30a8e1567403251c..d02c249a62f60cdb91fbd4af9e975d39c6c4e29d 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.cpp
@@ -6,7 +6,7 @@
 
 #include <fstream>
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "UbScheduler.h"
@@ -14,7 +14,7 @@
 
 AdjustForcingCoProcessor::AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                                    SPtr<IntegrateValuesHelper> integrateValues, double vTarged,
-                                                   SPtr<Communicator> comm)
+                                                   std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), integrateValues(integrateValues), comm(comm), vx1Targed(vTarged)
 {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
index 93b99dadab125f87a12b1e905c3171559a5ea31f..fbf75d066e626a3cf3d44c481138a9b1007b3107 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AdjustForcingCoProcessor.h
@@ -6,7 +6,7 @@
 
 #include "CoProcessor.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class UbScheduler;
 class Grid3D;
 class IntegrateValuesHelper;
@@ -21,7 +21,7 @@ class AdjustForcingCoProcessor : public CoProcessor
 {
 public:
     AdjustForcingCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<IntegrateValuesHelper> integrateValues, double vTarged, SPtr<Communicator> comm);
+                             SPtr<IntegrateValuesHelper> integrateValues, double vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
     //!< calls collect PostprocessData
     void process(double step) override;
 
@@ -30,7 +30,7 @@ protected:
     SPtr<IntegrateValuesHelper> integrateValues;
     //!< compares velocity in integrateValues with target velocity and adjusts forcing accordingly.
     void collectData(double step);
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     double vx1Targed; //!< target velocity.
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
index 6912d4bb9c3a2034adb33c0e05722cb6d93a01c3..adce3f920ed36850ff711c10c7777a5035de027e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/AverageValuesCoProcessor.cpp
@@ -7,7 +7,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
@@ -185,7 +185,7 @@ void AverageValuesCoProcessor::collectData(double step)
     piece           = subfolder + "/" + piece;
 
     vector<string> cellDataNames;
-    SPtr<Communicator> comm = Communicator::getInstance();
+    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
     vector<string> pieces   = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         string pname =
@@ -448,7 +448,7 @@ void AverageValuesCoProcessor::calculateAverageValues(double timeStep)
 ////////////////////////////////////////////////////////////////////////////
 // void AverageValuesCoProcessor::initPlotData(double step)
 //{
-//   SPtr<Communicator> comm = Communicator::getInstance();
+//   std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
 //	if (comm->getProcessID() == comm->getRoot())
 //	{
 //		std::ofstream ostr;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
index fe347d7864710b9c3bde20d83d63a5758c3d26e8..4e75e6337a44d46586a62a74d2e592b7d0839c57 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.cpp
@@ -4,7 +4,7 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
@@ -14,7 +14,7 @@
 #include "UbScheduler.h"
 
 CalculateForcesCoProcessor::CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       SPtr<Communicator> comm, double v, double a)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm, double v, double a)
     : CoProcessor(grid, s), path(path), comm(comm), v(v), a(a), forceX1global(0), forceX2global(0), forceX3global(0)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
index c3004cf16b3c70cb5f24526badb69f62683b0b36..aa6bfd47799ed5d426550c756eccfff706709e9e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateForcesCoProcessor.h
@@ -16,7 +16,7 @@
 #include "UbTuple.h"
 
 class ForceCalculator;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class D3Q27Interactor;
@@ -29,7 +29,7 @@ public:
     //! Constructor
     //! \param v - velocity of fluid in LB units
     //! \param a - area of object in LB units
-    CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm,
+    CalculateForcesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm,
                                double v, double a);
     ~CalculateForcesCoProcessor() override;
     void process(double step) override;
@@ -45,7 +45,7 @@ protected:
 
 private:
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     double forceX1global;
     double forceX2global;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
index 9fd6e8c28aeb1bdb8120c98f0a338aa21b38cc57..770e837a0f27b1cb42db385a44382c6bee402aaf 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
@@ -1,7 +1,7 @@
 #include "CalculateTorqueCoProcessor.h"
 #include "BCProcessor.h"
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "UbScheduler.h"
 #include "Grid3D.h"
@@ -13,7 +13,7 @@
 #include "EsoTwist3D.h"
 #include "DistributionArray3D.h"
 
-CalculateTorqueCoProcessor::CalculateTorqueCoProcessor( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, SPtr<Communicator> comm) : CoProcessor(grid, s), path(path_), comm(comm), forceX1global(0), forceX2global(0), forceX3global(0)
+CalculateTorqueCoProcessor::CalculateTorqueCoProcessor( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::mpi::Communicator> comm) : CoProcessor(grid, s), path(path_), comm(comm), forceX1global(0), forceX2global(0), forceX3global(0)
 {
    if (comm->getProcessID() == comm->getRoot())
    {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
index 43e1e75acaf4ab115ac9c6dc40b449cf98f97e79..b1a7f771f5dffb5146f66d2bc16399b92aaceba6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
@@ -17,7 +17,7 @@
 #include "D3Q27System.h"
 
 class ForceCalculator;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class D3Q27Interactor;
@@ -28,7 +28,7 @@ class CalculateTorqueCoProcessor: public CoProcessor
 {
 public:
    //! Constructor
-   CalculateTorqueCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm);
+   CalculateTorqueCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
 	virtual ~CalculateTorqueCoProcessor();             
 	void process(double step); 
    void addInteractor(SPtr<D3Q27Interactor> interactor);
@@ -38,7 +38,7 @@ protected:
    UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
 private:
    std::string path;
-   SPtr<Communicator> comm;
+   std::shared_ptr<vf::mpi::Communicator> comm;
    std::vector<SPtr<D3Q27Interactor> > interactors;
    double forceX1global;
    double forceX2global;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
index f7061e9c8f9ebc3a43171d1b075d95ba76818f60..6dae1c7049ea3c0d779b31fff2e79104e034790f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.cpp
@@ -10,13 +10,13 @@
 #include <vector>
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "LBMKernel.h"
 #include "UbScheduler.h"
 
 DecreaseViscosityCoProcessor::DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc,
-                                                           SPtr<Communicator> comm)
+                                                           std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), nueFunc(nueFunc), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
index 75a833be7d95c0f0fbd8a171d96de9e651598693..ca413ba2d5201d3043594f4a4b4803091bb51cc8 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/DecreaseViscosityCoProcessor.h
@@ -11,7 +11,7 @@
 
 class UbScheduler;
 class Grid3D;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 
 //! \brief The class sets viscosity/collision factor according to a previously defined function in time.
 //! \details initialization in test case (example):
@@ -28,7 +28,7 @@ class Communicator;
 class DecreaseViscosityCoProcessor : public CoProcessor
 {
 public:
-    DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, SPtr<Communicator> comm);
+    DecreaseViscosityCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, std::shared_ptr<vf::mpi::Communicator> comm);
     ~DecreaseViscosityCoProcessor() override;
     //! calls collect PostprocessData.
     void process(double step) override;
@@ -36,7 +36,7 @@ public:
 protected:
     //! resets the collision factor depending on the current timestep.
     void setViscosity(double step);
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     mutable mu::value_type timeStep;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
index 11251a33660ff72887ce984baba43b820c9a22e5..3195ea4dfc6a9be9cf49ef7e04bfe57bce6e70f2 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.cpp
@@ -1,5 +1,5 @@
 #include "EmergencyExitCoProcessor.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "MPIIORestartCoProcessor.h"
 #include "UbLogger.h"
@@ -8,7 +8,7 @@
 #include <basics/utilities/UbFileOutputASCII.h>
 
 EmergencyExitCoProcessor::EmergencyExitCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   SPtr<MPIIORestartCoProcessor> rp, SPtr<Communicator> comm)
+                                                   SPtr<MPIIORestartCoProcessor> rp, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), rp(rp), comm(comm)
 {
     this->path = path + "/exit";
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
index 382083c07a0b1dc900ef59ae2c2d5a5967d32f72..8894420c979eb6e7879c1788010d7e5d7e807eec 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/EmergencyExitCoProcessor.h
@@ -14,7 +14,7 @@
 #include "CoProcessor.h"
 
 class MPIIORestartCoProcessor;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -22,7 +22,7 @@ class EmergencyExitCoProcessor : public CoProcessor
 {
 public:
     EmergencyExitCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<MPIIORestartCoProcessor> rp, SPtr<Communicator> comm);
+                             SPtr<MPIIORestartCoProcessor> rp, std::shared_ptr<vf::mpi::Communicator> comm);
     ~EmergencyExitCoProcessor() override;
 
     void process(double step) override;
@@ -35,7 +35,7 @@ protected:
 
 private:
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<MPIIORestartCoProcessor> rp;
     std::string metafile;
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
index 7e04fd4b66a9abb108d07d7d033768f25b769299..5a514606ad8486073e7c1e3679ee7a31916553df 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.cpp
@@ -4,13 +4,13 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
 #include "LBMKernel.h"
 
-ForceCalculator::ForceCalculator(SPtr<Communicator> comm)
+ForceCalculator::ForceCalculator(std::shared_ptr<vf::mpi::Communicator> comm)
     : comm(comm), forceX1global(0), forceX2global(0), forceX3global(0)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
index 432cc452263deb9206a834251e49612276c2a060..7aeb514abe426020af59a936d5f8b8c184ea496f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ForceCalculator.h
@@ -14,14 +14,14 @@
 #include "Vector3D.h"
 
 class D3Q27Interactor;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class DistributionArray3D;
 class BoundaryConditions;
 
 class ForceCalculator
 {
 public:
-    ForceCalculator(std::shared_ptr<Communicator> comm);
+    ForceCalculator(std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~ForceCalculator();
 
     void calculateForces(std::vector<std::shared_ptr<D3Q27Interactor>> interactors);
@@ -34,7 +34,7 @@ public:
 private:
     void gatherGlobalForces();
 
-    std::shared_ptr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
     double forceX1global;
     double forceX2global;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
index 6955c55dd315a350909695d5693f60a6368f6bb0..40a8011ca871965f4b389ce32559b847021d2fe2 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituCatalystCoProcessor.cpp
@@ -20,7 +20,7 @@ InSituCatalystCoProcessor::InSituCatalystCoProcessor() {}
 InSituCatalystCoProcessor::InSituCatalystCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::string script)
     : CoProcessor(grid, s)
 {
-    gridRank     = Communicator::getInstance()->getProcessID();
+    gridRank     = vf::mpi::Communicator::getInstance()->getProcessID();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
     maxInitLevel = this->grid->getFinestInitializedLevel();
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
index 8d5cec10521830ba2aec9f2c06ef2a796da6b954..73c0a2325953994c337934347e872223ba18452a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/InSituVTKCoProcessor.cpp
@@ -30,7 +30,7 @@ InSituVTKCoProcessor::InSituVTKCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler>
                                            SPtr<LBMUnitConverter> conv)
     : CoProcessor(grid, s), conv(conv)
 {
-    gridRank     = Communicator::getInstance()->getProcessID();
+    gridRank     = vf::mpi::Communicator::getInstance()->getProcessID();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
     maxInitLevel = this->grid->getFinestInitializedLevel();
 
@@ -269,7 +269,7 @@ void InSituVTKCoProcessor::readConfigFile(const std::string &configFile)
     string dummy;
     int wRank = 0;
     getline(ifs, dummy);
-    int np = Communicator::getInstance()->getNumberOfProcesses();
+    int np = vf::mpi::Communicator::getInstance()->getNumberOfProcesses();
 
     while (ifs.good()) {
         getline(ifs, dummy, ';');
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
index 81f7dfc418f3ea13a706fef2820b355131e56a71..4e711bd7c03b1da262c427230dc1c357966e1681 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.cpp
@@ -10,7 +10,7 @@
 #include "LBMKernel.h"
 
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, SPtr<Communicator> comm, double minX1, double minX2,
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
                                              double minX3, double maxX1, double maxX2, double maxX3)
     :
 
@@ -21,7 +21,7 @@ IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, SPtr<Communicato
     init(-1);
 }
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, SPtr<Communicator> comm, double minX1, double minX2,
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2,
                                              double minX3, double maxX1, double maxX2, double maxX3, int level)
     :
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
index f5d846f98317d56f6c0ccf3e267a382ead1cd2a6..d6c87dcfd604bc1f1ded813b04e6ee71829c0d27 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/IntegrateValuesHelper.h
@@ -5,7 +5,7 @@
 
 #include "Block3D.h"
 #include "CbArray2D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27System.h"
 #include "GbCuboid3D.h"
 #include "Grid3D.h"
@@ -36,9 +36,9 @@ public:
     };
 
 public:
-    IntegrateValuesHelper(SPtr<Grid3D> grid, SPtr<Communicator> comm, double minX1, double minX2, double minX3,
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
                           double maxX1, double maxX2, double maxX3);
-    IntegrateValuesHelper(SPtr<Grid3D> grid, SPtr<Communicator> comm, double minX1, double minX2, double minX3,
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, double minX1, double minX2, double minX3,
                           double maxX1, double maxX2, double maxX3, int level);
     virtual ~IntegrateValuesHelper();
 
@@ -77,7 +77,7 @@ private:
     double sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
     std::vector<CalcNodes> cnodes;
     GbCuboid3DPtr boundingBox;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     CbArray2D<Node> cnodes2DMatrix;
     enum Values { AvVx = 0, AvVy = 1, AvVz = 2, AvVxx = 3, AvVyy = 4, AvVzz = 5, AvVxy = 6, AvVyz = 7, AvVxz = 8 };
 };
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
index fe04f4f9c98f8c41a33ee298df0689e2d254c2a7..9e5fa087fccf6d1121052ece7673a406984d52c0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "WbWriterVtkXmlASCII.h"
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CompressibleCumulantLBMKernel.h"
 #include "CoordinateTransformation3D.h"
 #include "DataSet3D.h"
@@ -13,7 +13,7 @@
 #include "UbScheduler.h"
 
 LineTimeSeriesCoProcessor::LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                     SPtr<GbLine3D> line, int level, SPtr<Communicator> comm)
+                                                     SPtr<GbLine3D> line, int level, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), length(0), ix1(0), ix2(0), ix3(0), level(level), line(line)
 {
     root  = comm->isRoot();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
index 76f933c0599c38d8c84eb4e872a307c951113d4b..16061b0b259b9118a82f7f46abbb919250b5dfea 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/LineTimeSeriesCoProcessor.h
@@ -9,7 +9,7 @@
 #include "CoProcessor.h"
 #include "LBMSystem.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class GbLine3D;
@@ -27,7 +27,7 @@ public:
 
 public:
     LineTimeSeriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<GbLine3D> line,
-                              int level, SPtr<Communicator> comm);
+                              int level, std::shared_ptr<vf::mpi::Communicator> comm);
     ~LineTimeSeriesCoProcessor() override = default;
 
     void process(double step) override;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
index 481311f909457f80655b5c888a8226635472a3ce..4f8f3a4b503f9490545ed760aaacf2778dece474 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
@@ -1,6 +1,6 @@
 #include "MPIIOCoProcessor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "Grid3D.h"
 #include "MPIIODataStructures.h"
@@ -13,7 +13,7 @@
 using namespace MPIIODataStructures;
 
 MPIIOCoProcessor::MPIIOCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                   SPtr<Communicator> comm)
+                                   std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), comm(comm)
 {
     UbSystem::makeDirectory(path + "/mpi_io_cp");
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
index 8997a39d1fc16136d06e507f493e743662e189a5..edee5255ebdb14ed23cd3f53e4738a3fd8d58186 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.h
@@ -8,14 +8,14 @@
 
 class Grid3D;
 class UbScheduler;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 
 //! \class MPIWriteBlocksBECoProcessor
 //! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating
 class MPIIOCoProcessor : public CoProcessor
 {
 public:
-    MPIIOCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm);
+    MPIIOCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOCoProcessor() override;
 
     //! Each timestep writes the grid into the files
@@ -37,7 +37,7 @@ public:
 
 protected:
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     MPI_Datatype gridParamType, block3dType, dataSetParamType, boundCondType, arrayPresenceType;
 };
 #endif // ! _MPIIOCoProcessor_H_
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
index 0d713212c0f19e62bc521482ecc5a937a359e7a6..b58376cebd7854a1aba1a2b69881ac445ad51082 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "BCProcessor.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -25,7 +25,7 @@ using namespace MPIIODataStructures;
 #define MESSAGE_TAG 80
 #define SEND_BLOCK_SIZE 100000
 
-MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
+MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
     : MPIIOCoProcessor(grid, s, path, comm), nue(-999.999), nuL(-999.999), nuG(-999.999), densityRatio(-999.999)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
index 9a89ada1ae039d10cd53b06b189e5709398911c8..7c9471246af0207d3baef1bb7e8e1726627d77b6 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
@@ -10,7 +10,7 @@
 
 class Grid3D;
 class UbScheduler;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class BCProcessor;
 class LBMKernel;
 
@@ -31,7 +31,7 @@ class MPIIOMigrationBECoProcessor : public MPIIOCoProcessor
 
 public:
     MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                SPtr<Communicator> comm);
+                                std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationBECoProcessor() override;
     //! Each timestep writes the grid into the files
     void process(double step) override;
@@ -80,7 +80,7 @@ public:
 
 protected:
     // std::string path;
-    // SPtr<Communicator> comm;
+    // std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     // MPI_Datatype gridParamType, block3dType;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
index 87ded25803be8c71d3b201aa7907aadc54cdfc1b..daf31d6fd663574381d5d5d63263283f9681de7a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "BCProcessor.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -22,7 +22,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIOMigrationCoProcessor::MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
+MPIIOMigrationCoProcessor::MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
     : MPIIOCoProcessor(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
index ca0de8f3e7ba315bc8a870f89063ea9f38d7b59f..1293d38d4a9f9a7ff7174db3473288cc1af70c4e 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
@@ -9,7 +9,7 @@
 
 class Grid3D;
 class UbScheduler;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class BCProcessor;
 class LBMKernel;
 
@@ -29,7 +29,7 @@ public:
         PhaseField2 = 8
     };
 
-    MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm);
+    MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIOMigrationCoProcessor() override;
     //! Each timestep writes the grid into the files
     void process(double step) override;
@@ -73,7 +73,7 @@ public:
 
 protected:
     // std::string path;
-    // SPtr<Communicator> comm;
+    // std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     // MPI_Datatype gridParamType, block3dType;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
index f69514478a2a53372c1807324581ad125bc55432..2a35d2d203a242a1ddc89ab431f6772e49ceb98b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "BCProcessor.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -25,7 +25,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIORestartCoProcessor::MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
+MPIIORestartCoProcessor::MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
     : MPIIOCoProcessor(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
index 57f559769a06d9a87a968ada73fbaba712da789b..b50eafb799f601a48aa0d80f5a4a0a17bab95d0f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
@@ -11,7 +11,7 @@
 
 class Grid3D;
 class UbScheduler;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class BCProcessor;
 class LBMKernel;
 
@@ -20,7 +20,7 @@ class LBMKernel;
 class MPIIORestartCoProcessor : public MPIIOCoProcessor
 {
 public:
-    MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm);
+    MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~MPIIORestartCoProcessor() override;
     //! Each timestep writes the grid into the files
     void process(double step) override;
@@ -62,7 +62,7 @@ public:
 
 protected:
     // std::string path;
-    // SPtr<Communicator> comm;
+    // std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     // MPI_Datatype gridParamType, block3dType;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
index 698528f32fc24a1d1cacc1f7fd7f61c59bd18430..53e98e9e107e0cc91fccf6e59afae18ea9a0e931 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.cpp
@@ -2,7 +2,7 @@
 #include "BCArray3D.h"
 #include "BCProcessor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27System.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
@@ -13,7 +13,7 @@
 #include <sstream>
 
 MicrophoneArrayCoProcessor::MicrophoneArrayCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       SPtr<Communicator> comm)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), comm(comm)
 {
     count = 0;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
index d6185df6be205442e2cf1dc7a58710232ae80edf..a10f30440c8539677511af6f7ac40fbe257d4eaf 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MicrophoneArrayCoProcessor.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <vector>
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class Vector3D;
@@ -23,7 +23,7 @@ class MicrophoneArrayCoProcessor : public CoProcessor
 {
 public:
     MicrophoneArrayCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                               SPtr<Communicator> comm);
+                               std::shared_ptr<vf::mpi::Communicator> comm);
     ~MicrophoneArrayCoProcessor() override;
 
     //! calls collectData.
@@ -38,7 +38,7 @@ protected:
 
 private:
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
     struct Mic {
         unsigned int id;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
index 46cbba0df4b96fddec45c7c696de51d74e9bc6eb..633ffd26f3ed77c58ac83200fdf18cb6f0385979 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.cpp
@@ -33,12 +33,12 @@
 
 #include "NUPSCounterCoProcessor.h"
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "UbScheduler.h"
 
 NUPSCounterCoProcessor::NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads,
-                                               SPtr<Communicator> comm)
+                                               std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), numOfThreads(numOfThreads), nup(0), nup_t(0), nupsStep(0.0), comm(comm)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
index 61d2ba0b69a96959b07d5e1901da62ab7abdaa1a..ce6b16996824be9e614e131c6e05fad0d1a507fd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/NUPSCounterCoProcessor.h
@@ -39,7 +39,7 @@
 #include "CoProcessor.h"
 #include "basics/utilities/UbTiming.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -54,7 +54,7 @@ public:
     //! \param s is UbScheduler object for scheduling of observer
     //! \param numOfThreads is number of threads
     //! \param comm is Communicator object
-    NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, SPtr<Communicator> comm);
+    NUPSCounterCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, std::shared_ptr<vf::mpi::Communicator> comm);
     ~NUPSCounterCoProcessor() override;
 
     void process(double step) override;
@@ -70,7 +70,7 @@ protected:
     double nup;
     double nup_t;
     double nupsStep;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
index 1c7e47f52e721069b20a6b3c27d1e71857ef74ab..ae385117c311eabfe2c5b98c8c2c45f4cd7473cd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
@@ -4,7 +4,7 @@
 #include "BCArray3D.h"
 #include "BCProcessor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "GbCuboid3D.h"
@@ -14,7 +14,7 @@
 
 PressureCoefficientCoProcessor::PressureCoefficientCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                GbCuboid3DPtr plane, const std::string &path,
-                                                               SPtr<Communicator> comm)
+                                                               std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), plane(plane), path(path), comm(comm)
 {
     maxStep       = scheduler->getMaxEnd();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
index f8f1d80d9c3185a13ab7ceebd579929f441c25e0..42927a7315d620e60c3af5c4285a89c18609cee7 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.h
@@ -11,7 +11,7 @@
 
 class GbCuboid3D;
 class D3Q27Interactor;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -19,7 +19,7 @@ class PressureCoefficientCoProcessor : public CoProcessor
 {
 public:
     PressureCoefficientCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<GbCuboid3D> plane,
-                                   const std::string &path, SPtr<Communicator> comm);
+                                   const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureCoefficientCoProcessor() override;
 
     void process(double step) override;
@@ -35,7 +35,7 @@ protected:
 private:
     SPtr<GbCuboid3D> plane;
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     int numberOfSteps;
     double maxStep;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
index e9b94dc6a225cd1b3b8a70126a242dd42e8e5b0c..74cd5a09c71b717f138090892b51b12a721f60ab 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.cpp
@@ -9,7 +9,7 @@
 
 #include <fstream>
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "LBMUnitConverter.h"
@@ -18,7 +18,7 @@
 PressureDifferenceCoProcessor::PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                              const std::string &path, SPtr<IntegrateValuesHelper> h1,
                                                              SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
-                                                             LBMReal uReal, LBMReal uLB, SPtr<Communicator> comm)
+                                                             LBMReal uReal, LBMReal uLB, std::shared_ptr<vf::mpi::Communicator> comm)
 
     : CoProcessor(grid, s), path(path), h1(h1), h2(h2), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
index e805c250ba4132d2f90560bfb48e6f361b2e467a..6de68a977904d5cc25ee37395eff4c9e66748eb4 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureDifferenceCoProcessor.h
@@ -14,7 +14,7 @@
 #include "CoProcessor.h"
 #include "LBMSystem.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -26,7 +26,7 @@ public:
     PressureDifferenceCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                   SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, LBMReal rhoReal,
                                   LBMReal uReal, LBMReal uLB,
-                                  /*const SPtr<LBMUnitConverter> conv,*/ SPtr<Communicator> comm);
+                                  /*const SPtr<LBMUnitConverter> conv,*/ std::shared_ptr<vf::mpi::Communicator> comm);
     ~PressureDifferenceCoProcessor() override;
 
     void process(double step) override;
@@ -36,7 +36,7 @@ protected:
     std::string path;
     SPtr<LBMUnitConverter> conv;
     void collectData(double step);
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     LBMReal factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
                      //rhoReal and uReal in SI
     LBMReal factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
index 28652870f2f1c81dd0d9d6f4fbb014a9626af4e1..1fbdb6f7f40a9b126cfa174d8cef7d7516ff884a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.cpp
@@ -7,11 +7,11 @@
 #include "basics/writer/WbWriterVtkXmlASCII.h"
 
 #include "BCArray3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "UbScheduler.h"
 
 QCriterionCoProcessor::QCriterionCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                             SPtr<UbScheduler> s, SPtr<Communicator> comm)
+                                             SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), comm(comm), writer(writer)
 {
     init();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
index 0227687526a0e0f46339d3342162a26393063872..55f0df5a2e8aaaf933babb70d6b9c5246424c34c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/QCriterionCoProcessor.h
@@ -13,7 +13,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -29,7 +29,7 @@ class QCriterionCoProcessor : public CoProcessor
 {
 public:
     QCriterionCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
-                          SPtr<Communicator> comm);
+                          std::shared_ptr<vf::mpi::Communicator> comm);
     //! Make update if timestep is write-timestep specified in SPtr<UbScheduler> s
     void process(double step) override;
 
@@ -58,7 +58,7 @@ private:
     int gridRank; // comm-Rank des aktuellen prozesses
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     enum Values { xdir = 0, ydir = 1, zdir = 2 }; // labels for the different components
 };
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
index 3b546a702726fca63d23f72dc0b04545ad544525..3765d2a999f73c476a4f1b250daeaefde971c277 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/ShearStressCoProcessor.cpp
@@ -4,7 +4,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "Grid3D.h"
@@ -16,7 +16,7 @@ ShearStressCoProcessor::ShearStressCoProcessor(SPtr<Grid3D> grid, const std::str
                                                SPtr<UbScheduler> s, SPtr<UbScheduler> rs)
     : CoProcessor(grid, s), Resetscheduler(rs), path(path), writer(writer)
 {
-    SPtr<Communicator> comm = Communicator::getInstance();
+    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
     normals.push_back(0);
     normals.push_back(0);
     normals.push_back(1);
@@ -62,7 +62,7 @@ void ShearStressCoProcessor::collectData(double step)
 
     // vector<string> cellDataNames;
 
-    // SPtr<Communicator> comm = Communicator::getInstance();
+    // std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
     // vector<string> pieces = comm->gatherStrings(piece);
     // if (comm->getProcessID() == comm->getRoot())
     //{
@@ -94,7 +94,7 @@ void ShearStressCoProcessor::collectData(double step)
     piece           = subfolder + "/" + piece;
 
     vector<string> cellDataNames;
-    SPtr<Communicator> comm = Communicator::getInstance();
+    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
     vector<string> pieces   = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         string pname =
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
index 24230ca99c605e8bbd2594236253c8ef63de1afc..7391754ee92fe216b78245c38032dbfc0594ad62 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
@@ -4,7 +4,7 @@
 #include "LBMKernel.h"
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
@@ -16,7 +16,7 @@ TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor() = default;
 //////////////////////////////////////////////////////////////////////////
 TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
-                                                             SPtr<Communicator> comm, int options)
+                                                             std::shared_ptr<vf::mpi::Communicator> comm, int options)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm), options(options)
 {
     init();
@@ -26,7 +26,7 @@ TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid,
 //////////////////////////////////////////////////////////////////////////
 TimeAveragedValuesCoProcessor::TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
-                                                             SPtr<Communicator> comm, int options,
+                                                             std::shared_ptr<vf::mpi::Communicator> comm, int options,
                                                              std::vector<int> levels, std::vector<double> &levelCoords,
                                                              std::vector<double> &bounds, bool timeAveraging)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm), options(options), levels(levels),
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
index 70dd79f062331be762603d07576675a123f7770b..155f293a08d0ef0726193a48c9a8fb8051bd3972 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.h
@@ -9,7 +9,7 @@
 #include "IntegrateValuesHelper.h"
 #include "LBMSystem.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -41,9 +41,9 @@ public:
 public:
     TimeAveragedValuesCoProcessor();
     TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                  SPtr<UbScheduler> s, SPtr<Communicator> comm, int options);
+                                  SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options);
     TimeAveragedValuesCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                  SPtr<UbScheduler> s, SPtr<Communicator> comm, int options, std::vector<int> levels,
+                                  SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options, std::vector<int> levels,
                                   std::vector<double> &levelCoords, std::vector<double> &bounds,
                                   bool timeAveraging = true);
     //! Make update
@@ -70,7 +70,7 @@ protected:
     void calculateAverageValuesForPlane(std::vector<IntegrateValuesHelper::CalcNodes> &cnodes);
 
 private:
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
index 8103d646643bf350f65c299ee73625978c3c6f6a..d2be7f0e25ae773be89dcad02dc6b96c0651d23a 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.cpp
@@ -9,14 +9,14 @@
 
 #include <fstream>
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "LBMUnitConverter.h"
 #include "UbScheduler.h"
 
 TimeseriesCoProcessor::TimeseriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<IntegrateValuesHelper> h1,
-                                             const std::string &path, SPtr<Communicator> comm)
+                                             const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), h1(h1), path(path), comm(comm)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
index b59a68b66d4d42080824d44c339c03fffe27f269..e92e324aab1b7cbbe16d7e6652ecb3ed0dfa9ed4 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TimeseriesCoProcessor.h
@@ -13,7 +13,7 @@
 
 #include "CoProcessor.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class IntegrateValuesHelper;
@@ -27,7 +27,7 @@ class TimeseriesCoProcessor : public CoProcessor
 {
 public:
     TimeseriesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<IntegrateValuesHelper> h1,
-                          const std::string &path, SPtr<Communicator> comm);
+                          const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     ~TimeseriesCoProcessor() override;
 
     //! calls collectData.
@@ -38,7 +38,7 @@ protected:
 
     //! object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> h1;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
 private:
     std::string path; //! output filename, e.g.  pathname + "/steps/timeseries"
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
index b292d40bfff650a61bebc6c906ad867e7e660d83..51be3b53a9a3bc123b15e03d0925bd619fb01fa3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.cpp
@@ -3,7 +3,7 @@
 #include "BCArray3D.h"
 #include "BCProcessor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMKernel.h"
@@ -14,7 +14,7 @@
 
 TurbulenceIntensityCoProcessor::TurbulenceIntensityCoProcessor(SPtr<Grid3D> grid, const std::string &path,
                                                                WbWriter *const writer, SPtr<UbScheduler> s,
-                                                               SPtr<Communicator> comm)
+                                                               std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), comm(comm), writer(writer)
 {
     init();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
index 5c389eed8ca84c2a8ec6700af6c33bd8a492eb1c..40983604d25385420cba8da4af28faa33283aaf0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/TurbulenceIntensityCoProcessor.h
@@ -8,7 +8,7 @@
 #include "CoProcessor.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -18,7 +18,7 @@ class TurbulenceIntensityCoProcessor : public CoProcessor
 {
 public:
     TurbulenceIntensityCoProcessor(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                   SPtr<UbScheduler> s, SPtr<Communicator> comm);
+                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm);
     void process(double step) override;
 
 protected:
@@ -39,7 +39,7 @@ private:
     int gridRank;
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     enum Values { AvVx = 0, AvVy = 1, AvVz = 2, AvVxxyyzz = 3 };
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
index ed624de5bb0e750c9e2f3cfaf301cdc7f66fd2a3..536d2ee133517279aa2458d06aea8edcee1dd20f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.cpp
@@ -36,13 +36,13 @@
 #include <logger/Logger.h>
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
 
 WriteBlocksCoProcessor::WriteBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                               WbWriter *const writer, SPtr<Communicator> comm)
+                                               WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
index b26cdeebb29fcb1a9ba5b96f010c3c4af83e4c99..837d9bbad7533d0f097c07851b352c50cccf5465 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBlocksCoProcessor.h
@@ -39,7 +39,7 @@
 
 #include "CoProcessor.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -57,7 +57,7 @@ public:
     //! \param writer is WbWriter object
     //! \param comm is Communicator object
     WriteBlocksCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                           SPtr<Communicator> comm);
+                           std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBlocksCoProcessor() override;
 
     void process(double step) override;
@@ -69,7 +69,7 @@ protected:
 
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
index 02f7bb4a28972f946d7d6a3d45487a7906494fea..6c927f4945f9bcf211c7f84e38fbc6d395960b7f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.cpp
@@ -42,7 +42,7 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "CbArray3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
 #include "UbScheduler.h"
@@ -55,7 +55,7 @@ WriteBoundaryConditionsCoProcessor::WriteBoundaryConditionsCoProcessor() = defau
 //////////////////////////////////////////////////////////////////////////
 WriteBoundaryConditionsCoProcessor::WriteBoundaryConditionsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                        const std::string &path, WbWriter *const writer,
-                                                                       SPtr<Communicator> comm)
+                                                                       std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm)
 {
     gridRank     = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
index ad29abca5e90c1267d7ab1768c1b3600fbd535c7..2608a3ae8df931a5f0b347b77ad525712676aeab 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteBoundaryConditionsCoProcessor.h
@@ -41,7 +41,7 @@
 #include "CoProcessor.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -61,7 +61,7 @@ public:
     //! \param writer is WbWriter object
     //! \param comm is Communicator object
     WriteBoundaryConditionsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                       WbWriter *const writer, SPtr<Communicator> comm);
+                                       WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteBoundaryConditionsCoProcessor() override = default;
 
     void process(double step) override;
@@ -84,6 +84,6 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
index 3c1c36a11fcdfc000cc6251130f720baf91607a2..1935ea22396a43dad53b2cf0a5b2960319026656 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.cpp
@@ -1,5 +1,5 @@
 #include "WriteGbObjectsCoProcessor.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "GbObject3D.h"
 #include "UbScheduler.h"
 #include "WbWriterVtkXmlASCII.h"
@@ -7,7 +7,7 @@
 #include <vector>
 
 WriteGbObjectsCoProcessor::WriteGbObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                     WbWriter *const writer, SPtr<Communicator> comm)
+                                                     WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), path(path), writer(writer), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
index d1e9925b5eaec20a1319818acb67c662e595e7d3..09b9bdeb766d5c4251c18a46df888fe67ef54df8 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteGbObjectsCoProcessor.h
@@ -7,7 +7,7 @@
 #include <vector>
 
 class GbObject3D;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -21,7 +21,7 @@ class WriteGbObjectsCoProcessor : public CoProcessor
 {
 public:
     WriteGbObjectsCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                              SPtr<Communicator> comm);
+                              std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteGbObjectsCoProcessor() override;
     //! calls collectData.
     void process(double step) override;
@@ -35,7 +35,7 @@ private:
     std::vector<SPtr<GbObject3D>> objects;
     std::string path;
     WbWriter *writer;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif // WriteGbObjectsCoProcessor_h__
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
index 3d892d561fe64de1b3efb3f94450e36cc6632a0d..fc70b841ff2bee64176ec711dc579649c0f1c032 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.cpp
@@ -6,7 +6,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "GbObject3D.h"
 #include "Grid3D.h"
@@ -19,7 +19,7 @@ WriteMQFromSelectionCoProcessor::WriteMQFromSelectionCoProcessor() = default;
 WriteMQFromSelectionCoProcessor::WriteMQFromSelectionCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                  SPtr<GbObject3D> gbObject, const std::string &path,
                                                                  WbWriter *const writer, SPtr<LBMUnitConverter> conv,
-                                                                 SPtr<Communicator> comm)
+                                                                 std::shared_ptr<vf::mpi::Communicator> comm)
     : CoProcessor(grid, s), gbObject(gbObject), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank     = comm->getProcessID();
@@ -80,7 +80,7 @@ void WriteMQFromSelectionCoProcessor::collectData(double step)
     piece                = subfolder + "/" + piece;
 
     std::vector<std::string> cellDataNames;
-    SPtr<Communicator> comm         = Communicator::getInstance();
+    std::shared_ptr<vf::mpi::Communicator> comm         = vf::mpi::Communicator::getInstance();
     std::vector<std::string> pieces = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         std::string pname =
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
index 0e52856cb32360a706b77fdcea8935037664b542..0dc3976b14b9930a1c1713074ff2222ad52b1fc8 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMQFromSelectionCoProcessor.h
@@ -10,7 +10,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -24,7 +24,7 @@ public:
     WriteMQFromSelectionCoProcessor();
     WriteMQFromSelectionCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<GbObject3D> gbObject,
                                     const std::string &path, WbWriter *const writer, SPtr<LBMUnitConverter> conv,
-                                    SPtr<Communicator> comm);
+                                    std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMQFromSelectionCoProcessor() override = default;
 
     void process(double step) override;
@@ -47,7 +47,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<GbObject3D> gbObject;
 
     using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
index e98d6ac874ace46659bc2903b3c67a0f9f93fa24..4bf2b30375145fb961cb7f932afc4aa15780820b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
@@ -39,7 +39,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
@@ -52,7 +52,7 @@ WriteMacroscopicQuantitiesCoProcessor::WriteMacroscopicQuantitiesCoProcessor(SPt
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             SPtr<Communicator> comm)
+                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
         : CoProcessor(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
index 91df3acf3dc3f584516820e45ca000365dc5d94f..7fb1844e08cf7454294b658f539b95c38eb3fa34 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteMacroscopicQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, SPtr<Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMacroscopicQuantitiesCoProcessor() override = default;
 
     void process(double step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
     using CalcMacrosFct = void (*)(const LBMReal *const &, LBMReal &, LBMReal &, LBMReal &, LBMReal &);
     CalcMacrosFct calcMacros;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
index 73034d88996a3c22d8a3aa9e86517c5cbe92ffc3..c5442930623344fb528a7764fb1f1e314c35437c 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.cpp
@@ -39,7 +39,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
@@ -52,7 +52,7 @@ WriteMultiphaseQuantitiesCoProcessor::WriteMultiphaseQuantitiesCoProcessor(SPtr<
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             SPtr<Communicator> comm)
+                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
         : CoProcessor(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
index a4504c1dfccbad377e0bad4bc1aab51989abaff4..3825f9d4df3e744aec1605524c78f0028e4380fd 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMultiphaseQuantitiesCoProcessor.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteMultiphaseQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, SPtr<Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
     ~WriteMultiphaseQuantitiesCoProcessor() override = default;
 
     void process(double step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 
     LBMReal gradX1_phi(const LBMReal *const &);
     LBMReal gradX2_phi(const LBMReal *const &);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
index f2edcd551b8a08ff00792a7daea4ee70d0fa17af..900c4bc95e85e57254121882e43e89fbb05b7201 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.cpp
@@ -52,7 +52,7 @@ WriteThixotropyQuantitiesCoProcessor::WriteThixotropyQuantitiesCoProcessor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-WriteThixotropyQuantitiesCoProcessor::WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, SPtr<Communicator> comm) : CoProcessor(grid, s), path(path), writer(writer),	conv(conv),	comm(comm)
+WriteThixotropyQuantitiesCoProcessor::WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm) : CoProcessor(grid, s), path(path), writer(writer),	conv(conv),	comm(comm)
 {
 	gridRank = comm->getProcessID();
 	minInitLevel = this->grid->getCoarsestInitializedLevel();
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
index b14e7f9f54c0e8e444a5f00804fee5cbc1ff6e21..d247c5c76bd5dc243041e53905e2189980875bd3 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteThixotropyQuantitiesCoProcessor.h
@@ -38,14 +38,14 @@
 #include "Grid3D.h"
 #include "Block3D.h"
 #include "LBMUnitConverter.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "WbWriter.h"
 
 class WriteThixotropyQuantitiesCoProcessor : public  CoProcessor
 {
 public:
 	WriteThixotropyQuantitiesCoProcessor();
-	WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, SPtr<Communicator> comm);
+	WriteThixotropyQuantitiesCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
 	~WriteThixotropyQuantitiesCoProcessor() = default;
 
    void process(double step) override;
@@ -69,7 +69,7 @@ private:
    int minInitLevel;
    int maxInitLevel;
    int gridRank;
-   SPtr<Communicator> comm;
+   std::shared_ptr<vf::mpi::Communicator> comm;
 //	double ConcentrationSum;
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
index 1d37e4a09f5c45967a76eb7ff1abc7999788c8b9..c1036c62ac09fbb27284faa0b27825bfee3220ac 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.cpp
@@ -50,7 +50,7 @@ using namespace std;
 
 Grid3D::Grid3D() { levelSet.resize(Grid3DSystem::MAXLEVEL + 1); }
 //////////////////////////////////////////////////////////////////////////
-Grid3D::Grid3D(SPtr<Communicator> comm)
+Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm)
 
 {
     levelSet.resize(Grid3DSystem::MAXLEVEL + 1);
@@ -58,7 +58,7 @@ Grid3D::Grid3D(SPtr<Communicator> comm)
     rank = comm->getProcessID();
 }
 //////////////////////////////////////////////////////////////////////////
-Grid3D::Grid3D(SPtr<Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3)
+Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3)
     :
 
       blockNx1(blockNx1), blockNx2(blockNx2), blockNx3(blockNx2), nx1(gridNx1), nx2(gridNx2), nx3(gridNx3)
@@ -2299,7 +2299,7 @@ void Grid3D::renumberBlockIDs()
 
 
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::updateDistributedBlocks(SPtr<Communicator> comm)
+void Grid3D::updateDistributedBlocks(std::shared_ptr<vf::mpi::Communicator> comm)
 {
 
     std::vector<int> blocks;
diff --git a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
index f3aa968316fcc74f77584ad58ae7114dcab0840c..3607a3f99d8593f3a4c73e5393b1eb1250655883 100644
--- a/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
+++ b/src/cpu/VirtualFluidsCore/Grid/Grid3D.h
@@ -47,7 +47,7 @@ class CoordinateTransformation3D;
 #include <Block3DVisitor.h>
 #include <Grid3DVisitor.h>
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Block3D;
 class Interactor3D;
 
@@ -66,8 +66,8 @@ public:
 
 public:
     Grid3D();
-    Grid3D(SPtr<Communicator> comm);
-    Grid3D(SPtr<Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3);
+    Grid3D(std::shared_ptr<vf::mpi::Communicator> comm);
+    Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3);
     virtual ~Grid3D() = default;
     //////////////////////////////////////////////////////////////////////////
     // blocks control
@@ -96,7 +96,7 @@ public:
     BlockIDMap &getBlockIDs();
     void deleteBlockIDs();
     void renumberBlockIDs();
-    void updateDistributedBlocks(SPtr<Communicator> comm);
+    void updateDistributedBlocks(std::shared_ptr<vf::mpi::Communicator> comm);
     SPtr<Block3D> getSuperBlock(SPtr<Block3D> block);
     SPtr<Block3D> getSuperBlock(int ix1, int ix2, int ix3, int level);
     void getSubBlocks(SPtr<Block3D> block, int levelDepth, std::vector<SPtr<Block3D>> &blocks);
diff --git a/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp b/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
index d8ee6593400e8bc89dde7d2708f31595e86f1a5b..f96c7aa2f75391c667b873e8a91a758a989d9cc2 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
@@ -34,7 +34,7 @@
 #include "InteractorsHelper.h"
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "SetBcBlocksBlockVisitor.h"
 #include "SetSolidBlocksBlockVisitor.h"
 #include <Grid3D.h>
@@ -100,6 +100,6 @@ void InteractorsHelper::updateGrid()
         ids.push_back(block->getGlobalID());
 
     std::vector<int> rids;
-    Communicator::getInstance()->allGather(ids, rids);
+    vf::mpi::Communicator::getInstance()->allGather(ids, rids);
     grid->deleteBlocks(rids);
 }
diff --git a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
index 09f0af7ac74b79d18220de90dab10be78f580a88..eef54a8625147046c2d8f38e2207e2fe2d20e325 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
+++ b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
@@ -1,3 +1,3 @@
 #include "BlocksDistributor.h"
 
-BlocksDistributor::BlocksDistributor(SPtr<Grid3D> grid, SPtr<Communicator> comm) : grid(grid), comm(comm) {}
+BlocksDistributor::BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm) : grid(grid), comm(comm) {}
diff --git a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
index 7af79dc293676ae84fe2943e3316ebb4d4ddd866..85aa52d05e0dd215ac93ca4bb08cc057f84914d0 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
@@ -1,7 +1,7 @@
 #ifndef BlocksDistributor_H
 #define BlocksDistributor_H
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3D.h"
 
 #include <PointerDefinitions.h>
@@ -9,13 +9,13 @@
 class BlocksDistributor
 {
 public:
-    BlocksDistributor(SPtr<Grid3D> grid, SPtr<Communicator> comm);
+    BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm);
     ~BlocksDistributor();
 
 protected:
 private:
     SPtr<Grid3D> grid;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
index 146f5eccc6823cd3d057eb8ea08e08dd2a95ef29..6e7968f37493476ac7f076b4d7aa129b56c7326f 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
@@ -1,7 +1,7 @@
 #include "CheckpointConverter.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "DataSet3D.h"
 #include "Grid3D.h"
@@ -12,7 +12,7 @@
 
 using namespace MPIIODataStructures;
 
-CheckpointConverter::CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, SPtr<Communicator> comm)
+CheckpointConverter::CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
     : grid(grid), path(path), comm(comm)
 {
     UbSystem::makeDirectory(path + "/mpi_io_cp");
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
index bad6116ccf8e1ebc32b8f7d6e12c3d36bc1b4e46..6fe24772d574a6db67428a820027971b4c7fd230 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
@@ -8,14 +8,14 @@
 #include <vector>
 
 class Grid3D;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 
 //! \class UtilConvertor
 //! \brief Converts timestep data from MPIIORestartCoProcessor format into MPIIOMigrationCoProcessor format
 class CheckpointConverter
 {
 public:
-    CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, SPtr<Communicator> comm);
+    CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~CheckpointConverter();
 
     void convert(int step, int procCount);
@@ -26,7 +26,7 @@ public:
 
 protected:
     std::string path;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     SPtr<Grid3D> grid;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
index 89b76aa946a42d848b4bf9e5e4dcf67726b2404b..7602438a23f16295f8d518f70d5a036dac4515ec 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
@@ -50,7 +50,7 @@ CreateTransmittersHelper::CreateTransmittersHelper() = default;
 //////////////////////////////////////////////////////////////////////////
 void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Block3D> tblock, int dir, IBlock ib,
                                                   TransmitterPtr &sender, TransmitterPtr &receiver,
-                                                  SPtr<Communicator> comm, TransmitterType tType)
+                                                  std::shared_ptr<vf::mpi::Communicator> comm, TransmitterType tType)
 {
     // SourceBlock
     int srcLevel = sblock->getLevel();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
index 353bb31229841c7b61fea4d2a4d7fca272e39135..d51f6352a251fe360aaf2a8365c77315e099d4d2 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
@@ -35,7 +35,7 @@
 #define CREATETRANSMITTERSHELPER_H
 
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 
 #include "LBMSystem.h"
 
@@ -61,7 +61,7 @@ public:
 public:
     CreateTransmittersHelper();
     void createTransmitters(const SPtr<Block3D> sblock, const SPtr<Block3D> tblock, int dir, IBlock ib,
-                            TransmitterPtr &sender, TransmitterPtr &receiver, SPtr<Communicator> comm,
+                            TransmitterPtr &sender, TransmitterPtr &receiver, std::shared_ptr<vf::mpi::Communicator> comm,
                             TransmitterType tType);
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
index 7b516fc75c608a1627edc9b75264d32bc660c59a..412e9347d707fe2e4ad733cf53419f18f86faf01 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
@@ -2,14 +2,14 @@
 
 #include "MetisPartitioningGridVisitor.h"
 #include "Block3D.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include <cmath>
 
 using namespace std;
 
-MetisPartitioningGridVisitor::MetisPartitioningGridVisitor(SPtr<Communicator> comm, GraphType graphType, int numOfDirs,
+MetisPartitioningGridVisitor::MetisPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, GraphType graphType, int numOfDirs,
                                                            MetisPartitioner::PartType partType, bool threads,
                                                            int numberOfThreads)
     : Grid3DVisitor(), numberOfThreads(numberOfThreads), numOfDirs(numOfDirs), comm(comm), threads(threads),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
index d1bc5ba308ff798c21cc27cae02367f31b35fbac..c270d3ce389cc2697c1ac54178984ffa2f4d07a9 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
@@ -9,7 +9,7 @@
 #include "Grid3DVisitor.h"
 #include "MetisPartitioner.h"
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 
 ////////////////////////////////////////////////////////////////////////
 //! \brief The class implements domain decomposition with METIS library
@@ -32,7 +32,7 @@ public:
     //! \param numOfDirs - maximum number of neighbors for each process
     //! \param threads - on/off decomposition for threads
     //! \param numberOfThreads - number of threads
-    MetisPartitioningGridVisitor(SPtr<Communicator> comm, GraphType graphType, int numOfDirs,
+    MetisPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, GraphType graphType, int numOfDirs,
                                  MetisPartitioner::PartType partType = MetisPartitioner::KWAY, bool threads = false,
                                  int numberOfThreads = 0);
     ~MetisPartitioningGridVisitor() override;
@@ -52,7 +52,7 @@ protected:
     int numOfDirs;
     std::vector<int> blockID;
     std::vector<idx_t> parts;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     int bundleRoot;
     int processRoot;
     int bundleID;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
index 88ff49408be67b02f9f56cf35287ba17f1216b33..c151ac90492bc7545f7d498fdfd2b3f9efbc9ab6 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
@@ -1,5 +1,5 @@
 #include "RefineAroundGbObjectHelper.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "OverlapBlockVisitor.h"
 #include "RatioBlockVisitor.h"
 #include "RatioSmoothBlockVisitor.h"
@@ -11,7 +11,7 @@
 RefineAroundGbObjectHelper::RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int refineLevel,
                                                        SPtr<D3Q27TriFaceMeshInteractor> objectIter,
                                                        double startDistance, double stopDistance,
-                                                       SPtr<Communicator> comm)
+                                                       std::shared_ptr<vf::mpi::Communicator> comm)
     : grid(grid), refineLevel(refineLevel), objectIter(objectIter), startDistance(startDistance),
       stopDistance(stopDistance), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
index 766c290a7605eb428449fe5d5f6215157a08e3d4..0421a963e6d57da5096370eed9721220c98939b4 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
@@ -4,7 +4,7 @@
 #include <PointerDefinitions.h>
 
 class Grid3D;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class D3Q27TriFaceMeshInteractor;
 
 //! \brief Refine blocks on base of bounding boxes.
@@ -20,7 +20,7 @@ public:
     //! \param startDistance start distance from geometry for refinement
     //! \param stopDistance stop distance from geometry for refinement
     RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                               double startDistance, double stopDistance, SPtr<Communicator> comm);
+                               double startDistance, double stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~RefineAroundGbObjectHelper();
     //! start refinement
     void refine();
@@ -30,7 +30,7 @@ private:
     SPtr<D3Q27TriFaceMeshInteractor> objectIter;
     int refineLevel;
     double startDistance, stopDistance;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
index 14ea2b073e43065d31cecc8b2893503d7fc0ed60..656457c0580739bd00783b3050fb2c460eaa5a14 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
@@ -1,6 +1,6 @@
 #include "RefineCrossAndInsideGbObjectHelper.h"
 #include "CheckRatioBlockVisitor.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "OverlapBlockVisitor.h"
 #include "RatioBlockVisitor.h"
 #include "RatioSmoothBlockVisitor.h"
@@ -11,7 +11,7 @@
 #include <Grid3D.h>
 
 RefineCrossAndInsideGbObjectHelper::RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel,
-                                                                       SPtr<Communicator> comm)
+                                                                       std::shared_ptr<vf::mpi::Communicator> comm)
     : grid(grid), maxRefineLevel(maxRefineLevel), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
index fd1fefe3d1e522d5fd3e60a97785d94590247bb0..d0a9ac44891519b3fd583f98a56e33dfd1e42122 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
@@ -4,7 +4,7 @@
 #include <PointerDefinitions.h>
 #include <vector>
 
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class Grid3D;
 class GbObject3D;
 
@@ -17,7 +17,7 @@ public:
     //! Constructor
     //! \param grid a smart pointer to the grid object
     //! \param maxRefineLevel an integer for maximal refinement level
-    RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, SPtr<Communicator> comm);
+    RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, std::shared_ptr<vf::mpi::Communicator> comm);
     virtual ~RefineCrossAndInsideGbObjectHelper();
     //! add geometric object
     //! \param object a smart pointer to bounding box
@@ -31,7 +31,7 @@ private:
     std::vector<SPtr<GbObject3D>> objects;
     std::vector<int> levels;
     int maxRefineLevel;
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
index 433e08cea74fcf7c67df0ee0119b34036e19de8c..fc9c5c203c5d631ae7e125f75d72d70e8502890d 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
@@ -4,7 +4,7 @@
 #include "Grid3DSystem.h"
 //#include <mpi.h>
 
-RenumberGridVisitor::RenumberGridVisitor(SPtr<Communicator> com) : comm(com) {}
+RenumberGridVisitor::RenumberGridVisitor(std::shared_ptr<vf::mpi::Communicator> com) : comm(com) {}
 
 //////////////////////////////////////////////////////////////////////////
 void RenumberGridVisitor::visit(SPtr<Grid3D> grid)
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
index eabb0cafd06af9588b87dc479f4684393e6afb5c..993bccd1034d0fb648c2e05d77da380916816967 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
@@ -8,7 +8,7 @@
 #ifndef RenumberGridVisitor_h
 #define RenumberGridVisitor_h
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3DVisitor.h"
 
 class Grid3D;
@@ -19,14 +19,14 @@ class Grid3D;
 class RenumberGridVisitor : public Grid3DVisitor
 {
 public:
-    RenumberGridVisitor(SPtr<Communicator> com);
+    RenumberGridVisitor(std::shared_ptr<vf::mpi::Communicator> com);
 
     ~RenumberGridVisitor() override = default;
 
     void visit(SPtr<Grid3D> grid) override;
 
 private:
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     //   static int counter;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
index 827a8263a3b4bf0f013dc7b1be7f597ce5ccb82e..bd28c23f9ec7bf43acbd37a532ce07652ae6ee93 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
@@ -41,7 +41,7 @@
 #include "Grid3DSystem.h"
 #include "Grid3D.h"
 #include "CreateTransmittersHelper.h"
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "OneDistributionFullDirectConnector.h"
 #include "OneDistributionFullVectorConnector.h"
 #include "TwoDistributionsFullDirectConnector.h"
@@ -58,20 +58,20 @@ public:
     using LocalConnector  = T1;
     using RemoteConnector = T2;
 public:
-    SetConnectorsBlockVisitor(SPtr<Communicator> comm);
+    SetConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm);
     ~SetConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
 protected:
     void setSameLevelConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block);
     void setRemoteConnectors(SPtr<Block3D> sblock, SPtr<Block3D> tblock, int dir);
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     int dirs;
     int gridRank;
 };
 
 template <class T1, class T2>
-SetConnectorsBlockVisitor<T1, T2>::SetConnectorsBlockVisitor(SPtr<Communicator> comm)
+SetConnectorsBlockVisitor<T1, T2>::SetConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm)
     : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
index 8c0d4b93066eb69f8036cc80fab9254c7678b741..6a55ee5af55df96b4c1335976728ca7e08ee8ece 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
@@ -39,10 +39,10 @@
 #include "Grid3DSystem.h"
 #include <basics/transmitter/TbTransmitterLocal.h>
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "InterpolationProcessor.h"
 
-SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(SPtr<Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor) :
+SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor) :
 Block3DVisitor(0, Grid3DSystem::MAXLEVEL), 
 	comm(comm),
 	nue(nue),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
index 972a878bf210d0532126c0c6fb481f3ed936db7a..7ae54b0b62cadbc58eb5b0cc804f00a977d47615 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
@@ -43,14 +43,14 @@
 
 class Grid3D;
 class Block3D;
-class Communicator;
+namespace vf::mpi {class Communicator;}
 class InterpolationProcessor;
 
 //! \brief  A class sets connectors between blocks.
 class SetInterpolationConnectorsBlockVisitor : public Block3DVisitor
 {
 public:
-    SetInterpolationConnectorsBlockVisitor(SPtr<Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor);
+    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, LBMReal nue, SPtr<InterpolationProcessor> iProcessor);
     ~SetInterpolationConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
@@ -63,7 +63,7 @@ protected:
                             CreateTransmittersHelper::TransmitterPtr &receiverCF,
                             CreateTransmittersHelper::TransmitterPtr &senderFC,
                             CreateTransmittersHelper::TransmitterPtr &receiverFC);
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     int gridRank;
     LBMReal nue;
     SPtr<InterpolationProcessor> iProcessor;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
index 9a148a4f9bb799240a5ad0ba087d4354b8d0342a..7d9f5e8d4e233c6f18aa5e95818b71143c3d3442 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
@@ -9,7 +9,7 @@
 
 using namespace std;
 
-ZoltanPartitioningGridVisitor::ZoltanPartitioningGridVisitor(SPtr<Communicator> comm, int numOfDirs,
+ZoltanPartitioningGridVisitor::ZoltanPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, int numOfDirs,
                                                              int numOfLocalParts)
     : comm(comm), numOfDirs(numOfDirs), numOfLocalParts(numOfLocalParts)
 {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
index 4d9cd63c5c4c4a34bc8a1d5935b895fd9c5a2129..aeaf4d705c0b91cad482f87dff36ad6347363504 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
@@ -10,14 +10,14 @@
 
 #if defined VF_ZOLTAN && defined VF_MPI
 
-#include "Communicator.h"
+#include <mpi/Communicator.h>
 #include "Grid3DVisitor.h"
 #include "ZoltanPartitioner.h"
 
 class ZoltanPartitioningGridVisitor : public Grid3DVisitor
 {
 public:
-    ZoltanPartitioningGridVisitor(SPtr<Communicator> comm, int numOfDirs, int numOfLocalParts = 1);
+    ZoltanPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, int numOfDirs, int numOfLocalParts = 1);
     ~ZoltanPartitioningGridVisitor();
     void visit(SPtr<Grid3D> grid);
 
@@ -26,7 +26,7 @@ protected:
     void repartGrid(SPtr<Grid3D> grid, ZoltanPartitioner &zp);
 
 private:
-    SPtr<Communicator> comm;
+    std::shared_ptr<vf::mpi::Communicator> comm;
     int numOfDirs;
     int numOfLocalParts;
     ZoltanGraph *graph;
diff --git a/src/cpu/simulationconfig/include/simulationconfig/Simulation.h b/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
index 4bf800c2375347ab7040424bdb2e4c53d5cc2bf8..63298db81741864b40c4b320fbe4bd72f688715c 100644
--- a/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
+++ b/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
@@ -4,13 +4,15 @@
 #include <string>
 #include <memory>
 #include <set>
+
+#include <mpi/Communicator.h>
+
 #include <geometry3d/GbPoint3D.h>
 #include <Interactors/Interactor3D.h>
 #include <BoundaryConditions/BCAdapter.h>
 #include <Visitors/BoundaryConditionsBlockVisitor.h>
 #include <CoProcessors/CoProcessor.h>
 #include <LBM/LBMUnitConverter.h>
-#include <Parallel/Communicator.h>
 #include "KernelFactory.h"
 #include "AbstractLBMSystem.h"
 #include "KernelConfigStructs.h"
@@ -24,7 +26,7 @@ private:
 
     std::shared_ptr<LBMKernel> lbmKernel;
     std::shared_ptr<AbstractLBMSystem> lbmSystem;
-    std::shared_ptr<Communicator> communicator;
+    std::shared_ptr<vf::mpi::Communicator> communicator;
 
     std::shared_ptr<Grid3D> grid;
     std::vector<std::shared_ptr<Interactor3D>> interactors;
diff --git a/src/cpu/simulationconfig/src/Simulation.cpp b/src/cpu/simulationconfig/src/Simulation.cpp
index f4fbad090fc60e424e777b4f601243eef8eb151e..ab818f5280628e51648e6d478dbb827b2bcc78ed 100644
--- a/src/cpu/simulationconfig/src/Simulation.cpp
+++ b/src/cpu/simulationconfig/src/Simulation.cpp
@@ -23,7 +23,7 @@
 #include <LBM/CompressibleOffsetMomentsInterpolationProcessor.h>
 #include <LBM/LBMKernel.h>
 #include <LBM/LBMUnitConverter.h>
-#include <Parallel/MPICommunicator.h>
+#include <mpi/MPICommunicator.h>
 #include <Visitors/GenBlocksGridVisitor.h>
 #include <Visitors/InitDistributionsBlockVisitor.h>
 #include <Visitors/MetisPartitioningGridVisitor.h>
@@ -36,7 +36,7 @@
 
 Simulation::Simulation()
 {
-    this->communicator = MPICommunicator::getInstance();
+    this->communicator = vf::mpi::MPICommunicator::getInstance();
     this->grid = std::shared_ptr<Grid3D>(new Grid3D(communicator));
     this->interactors = std::vector<std::shared_ptr<Interactor3D>>();
     this->bcVisitor = BoundaryConditionsBlockVisitor();
diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3b37b213cab4bf0e562438264d5fea6eb4dc7b3
--- /dev/null
+++ b/src/cuda/CMakeLists.txt
@@ -0,0 +1,3 @@
+project(cuda LANGUAGES CUDA CXX)
+
+vf_add_library(NAME cuda PUBLIC_LINK logger)
diff --git a/src/cuda/CudaGrid.cpp b/src/cuda/CudaGrid.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c7267972859f04931d9a69cfca073c7ac3267b9f
--- /dev/null
+++ b/src/cuda/CudaGrid.cpp
@@ -0,0 +1,30 @@
+#include "CudaGrid.h"
+
+#include <logger/Logger.h>
+
+namespace vf::cuda
+{
+
+CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int numberOfEntities)
+{
+    unsigned int Grid = (numberOfEntities / numberOfThreads) + 1;
+    unsigned int Grid1, Grid2;
+    if (Grid > 512) {
+        Grid1 = 512;
+        Grid2 = (Grid / Grid1) + 1;
+    } else {
+        Grid1 = 1;
+        Grid2 = Grid;
+    }
+    
+    grid = dim3(Grid1, Grid2);
+    threads = dim3(numberOfThreads, 1, 1);
+}
+
+void CudaGrid::print() const
+{
+    VF_LOG_INFO("blocks: ({},{},{}), threads: ({},{},{})", grid.x, grid.y, grid.z, threads.x, threads.y, threads.z);
+}
+
+
+}
diff --git a/src/cuda/CudaGrid.h b/src/cuda/CudaGrid.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9926c3861749f648da529f2b45554d13599a302
--- /dev/null
+++ b/src/cuda/CudaGrid.h
@@ -0,0 +1,24 @@
+#ifndef CUDA_GRID_H
+#define CUDA_GRID_H
+
+
+#include <cuda_runtime.h>
+
+namespace vf::cuda
+{
+
+struct CudaGrid 
+{
+    dim3 threads;
+    dim3 grid;
+
+    CudaGrid(unsigned int numberOfThreads, unsigned int numberOfEntities);
+    CudaGrid() = default;
+
+    void print() const;
+};
+
+
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/CudaTimer.cpp b/src/cuda/CudaTimer.cpp
similarity index 97%
rename from src/gpu/VirtualFluids_GPU/LBM/CudaTimer.cpp
rename to src/cuda/CudaTimer.cpp
index 299f898aaaf69583a5cba76d824b01c73ace629d..97a2c0977058b712ab345f6d2c1299b3e2448198 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/CudaTimer.cpp
+++ b/src/cuda/CudaTimer.cpp
@@ -1,5 +1,8 @@
 #include "CudaTimer.h"
 
+namespace vf::cuda
+{
+
 void CudaTimer::createSdkTimer()
 {
     sdkCreateTimer(&sdkTimer);
@@ -49,3 +52,5 @@ void CudaTimer::deleteEventTimer()
     checkCudaErrors(cudaEventDestroy(start_t));
     checkCudaErrors(cudaEventDestroy(stop_t));
 }
+
+}
diff --git a/src/gpu/VirtualFluids_GPU/LBM/CudaTimer.h b/src/cuda/CudaTimer.h
similarity index 88%
rename from src/gpu/VirtualFluids_GPU/LBM/CudaTimer.h
rename to src/cuda/CudaTimer.h
index cb15eee9c8b7f529a10102c1a7331fa90a714724..51193c2f73959e31dab8661a952756f2d0882e06 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/CudaTimer.h
+++ b/src/cuda/CudaTimer.h
@@ -1,11 +1,13 @@
-#ifndef CudaTimer_H
-#define CudaTimer_H
+#ifndef CUDA_CudaTimer_H
+#define CUDA_CudaTimer_H
 
 
 #include <cuda_runtime.h>
 #include <helper_functions.h>
 #include <helper_cuda.h>
 
+namespace vf::cuda
+{
 
 class CudaTimer
 {
@@ -30,4 +32,6 @@ private:
 
 };
 
+}
+
 #endif
diff --git a/src/cuda/DeviceInfo.cpp b/src/cuda/DeviceInfo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..20ea2c4f6ba098b17e444f55625a6791e46141e5
--- /dev/null
+++ b/src/cuda/DeviceInfo.cpp
@@ -0,0 +1,123 @@
+#include "DeviceInfo.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <cuda_runtime.h>
+
+#include <logger/Logger.h>
+
+namespace vf::cuda
+{
+
+void verifyNumberOfDevices(int deviceId)
+{
+    int device_count = 0;
+    cudaError_t errorId = cudaGetDeviceCount(&device_count);
+    if(errorId != cudaSuccess) {
+        VF_LOG_CRITICAL("Error while accessing the device count: {}", cudaGetErrorString(errorId));
+    }
+    if (deviceId > device_count) {
+        throw std::runtime_error("chosen gpudevice >=  device_count ... exiting\n");
+    }
+}
+
+void verifyComputeCapability(int deviceId)
+{
+    cudaDeviceProp deviceProp;
+    cudaError_t errorId = cudaGetDeviceProperties(&deviceProp, deviceId);
+
+    if(errorId != cudaSuccess){
+        VF_LOG_CRITICAL("Error while accessing the device properties occurs: {}", cudaGetErrorString(errorId));
+    }
+
+    VF_LOG_INFO("[compute capability] = [{}.{}]\n", deviceProp.major, deviceProp.minor);
+
+    if (deviceProp.major > 999) {
+        throw std::runtime_error("warning, CUDA Device Emulation (CPU) detected, exiting\n");
+    }
+}
+
+void setCudaDevice(int deviceId)
+{
+    // choose a cuda device for kernel execution
+    cudaError_t errorId = cudaSetDevice(deviceId);
+    if (errorId != cudaSuccess) {
+        VF_LOG_CRITICAL("Error while setting the device to {}: {}", deviceId, cudaGetErrorString(errorId));
+    } else {
+        int device;
+        // double check that device was properly selected
+        errorId = cudaGetDevice(&device);
+        if(errorId != cudaSuccess) {
+            VF_LOG_CRITICAL("Error while getting the device: {}", cudaGetErrorString(errorId));
+        }
+    }
+}
+
+void verifyAndSetDevice(int deviceId)
+{
+    verifyNumberOfDevices(deviceId);
+    verifyComputeCapability(deviceId);
+
+    setCudaDevice(deviceId);
+}
+
+
+
+void printCudaInformation(int deviceId) 
+{
+    cudaDeviceProp prop;
+    cudaError_t errorId = cudaGetDeviceProperties(&prop, deviceId);
+
+    if(errorId != cudaSuccess){
+        VF_LOG_CRITICAL("Error while accessing the device properties occurs: {}", cudaGetErrorString(errorId));
+    }
+
+    printf(" --- General Information for device %d ---\n", deviceId);
+    printf("Name: %s\n", prop.name);
+    printf("Compute capability: %d.%d\n", prop.major, prop.minor);
+    printf("Clock rate: %d\n", prop.clockRate);
+    printf("Device copy overlap: ");
+    if (prop.deviceOverlap)
+        printf("Enabled\n");
+    else
+        printf("Disabled\n");
+    printf("Kernel execition timeout : ");
+    if (prop.kernelExecTimeoutEnabled)
+        printf("Enabled\n");
+    else
+        printf("Disabled\n");
+    printf(" --- Memory Information for device %d ---\n", deviceId);
+    printf("Total global mem: %zu\n", prop.totalGlobalMem);
+    printf("Total constant Mem: %zu\n", prop.totalConstMem);
+    printf("Max mem pitch: %zu\n", prop.memPitch);
+    printf("Texture Alignment: %zu\n", prop.textureAlignment);
+    printf("max Texture 1D: %d\n", prop.maxTexture1D);
+    printf("max Texture 2D: %d, %d\n", prop.maxTexture2D[0], prop.maxTexture2D[1]);
+    printf("max Texture 3D: %d, %d, %d\n", prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
+    printf(" --- MP Information for device %d ---\n", deviceId);
+    printf("Multiprocessor count: %d\n",
+        prop.multiProcessorCount);
+    printf("Shared mem per mp: %zd\n", prop.sharedMemPerBlock);
+    printf("Registers per mp: %d\n", prop.regsPerBlock);
+    printf("Threads in warp: %d\n", prop.warpSize);
+    printf("Max threads per block: %d\n",
+        prop.maxThreadsPerBlock);
+    printf("Max thread dimensions: (%d, %d, %d)\n",
+        prop.maxThreadsDim[0], prop.maxThreadsDim[1],
+        prop.maxThreadsDim[2]);
+    printf("Max grid dimensions: (%d, %d, %d)\n",
+        prop.maxGridSize[0], prop.maxGridSize[1],
+        prop.maxGridSize[2]);
+    printf(" --- -------------------------------- ---\n");
+    printf("\n");
+
+    cudaSetDevice(deviceId);
+    size_t free;
+    size_t total;
+    cudaMemGetInfo(&free, &total);
+    printf("Free: %zu Bytes, Total: %zu Bytes\n", free, total);
+    printf("Free: %zu MB, Total: %zu MB\n", free / 1000 / 1000, total / 1000 / 1000);
+}
+
+}
\ No newline at end of file
diff --git a/src/cuda/DeviceInfo.h b/src/cuda/DeviceInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..9a34824f38ecf5a7db035e87826b732325fdfddd
--- /dev/null
+++ b/src/cuda/DeviceInfo.h
@@ -0,0 +1,13 @@
+#ifndef CUDA_DEVICEINFO_H
+#define CUDA_DEVICEINFO_H
+
+namespace vf::cuda
+{
+
+void verifyAndSetDevice(int deviceId);
+
+void printCudaInformation(int deviceId);
+
+}
+
+#endif
diff --git a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
index 5a3dc76db1440fc641bd7c58cdce48b7996d63ee..346692bfdf8c8daf9a659a3a0ef04aa57f487545 100644
--- a/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/EnstrophyAnalyzer.cu
@@ -59,6 +59,8 @@ bool EnstrophyAnalyzer::run(uint iter)
     this->enstrophyTimeSeries.push_back( EnstrophyTmp );
 
     //*logging::out << logging::Logger::INFO_HIGH << "EKin = " << EKin << "\n";
+
+    return true;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
index 266d3b564d5a413f68fa27130b6a9f02eac54511..ed68f8d95a2a68c00ab53c2cd1037bbff43e0f5b 100644
--- a/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/HeatFluxAnalyzer.cu
@@ -63,6 +63,8 @@ bool HeatFluxAnalyzer::run(uint iter, Parameters parameters)
     this->heatFluxTimeSeries.push_back( q / qIdeal );
 
     if( iter % this->outputIter == 0 ) *logging::out << logging::Logger::INFO_HIGH << "q = " << q / qIdeal << "\n";
+
+    return true;
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
index d05c47cd2309c6b2804f04dbd0c2214d3be388a3..5e896e03e7f02b63759f4ff6d42ca7f7f5e7bfa5 100644
--- a/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
+++ b/src/gpu/GksGpu/Analyzer/TurbulenceAnalyzer.cu
@@ -51,6 +51,8 @@ bool TurbulenceAnalyzer::run(uint iter, Parameters parameters)
     getLastCudaError("TurbulenceAnalyzer::run(uint iter, Parameters parameters)");
 
     this->counter++;
+
+    return true;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
index 2b8b8174fd96d335a82cd80586415fc598576678..fd55918246bd4aebb38f1ed8982d86e776eb7fbb 100644
--- a/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/CreepingMassFlux.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
index 3ecd1b6cd52c2210b0e67c48937f1db1b9420f2a..87f880bcf1001a012487f5df327566dfe1ded350 100644
--- a/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/HeatFlux.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu b/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
index 21ab9829309e072fd5049026bfb74d79c4393acb..7f9b2777f5e75a5c79a2ee5f280871a021cf6c94 100644
--- a/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/Inflow.cu
@@ -86,7 +86,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
 {
     uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
-    uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
+    // uint secondCellIdx = boundaryCondition.secondCells[ startIndex + index ];
 
     PrimitiveVariables ghostCellPrim;
     {
diff --git a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
index 4aaf406348754db851e1c45be542f158d7621b36..f6e69742635d594b2f0f1319642c51a5dde78a9e 100644
--- a/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
+++ b/src/gpu/GksGpu/BoundaryConditions/MassCompensation.cu
@@ -91,7 +91,7 @@ __host__ __device__ inline void boundaryConditionFunction(const DataBaseStruct&
                                                           const uint startIndex,
                                                           const uint index)
 {
-    uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
+    // uint ghostCellIdx  = boundaryCondition.ghostCells [ startIndex + index ];
     uint domainCellIdx = boundaryCondition.domainCells[ startIndex + index ];
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/CMakeLists.txt b/src/gpu/GksGpu/CMakeLists.txt
index da404e0209ed2c9f36ae323d2e6bd234fb6dfb96..5dbc533cc5f45c006c29a12242350f0433518bbf 100644
--- a/src/gpu/GksGpu/CMakeLists.txt
+++ b/src/gpu/GksGpu/CMakeLists.txt
@@ -1,3 +1,10 @@
 project(GksGpu LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
+vf_add_library(PRIVATE_LINK basics lbmCuda GksMeshAdapter OpenMP::OpenMP_CXX MPI::MPI_CXX)
+
+target_include_directories(GksGpu PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
+
+if (NOT MSVC)
+    target_compile_options(GksGpu PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-fPIC>")
+endif()
+
diff --git a/src/gpu/GksGpu/CellProperties/CellProperties.cuh b/src/gpu/GksGpu/CellProperties/CellProperties.cuh
index 1ce36e85baa70739d9a98abf6cb9d3b9bd1a25f8..08731b9f52cdc54cc41d5e239ac05ee6e88fecd7 100644
--- a/src/gpu/GksGpu/CellProperties/CellProperties.cuh
+++ b/src/gpu/GksGpu/CellProperties/CellProperties.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 //////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/GksGpu/CellUpdate/Reaction.cuh b/src/gpu/GksGpu/CellUpdate/Reaction.cuh
index 4bf317b2704d0111079fcb9888f62026265e6fd0..21ba61220fd7b81fbb53002ea090d278d228bb66 100644
--- a/src/gpu/GksGpu/CellUpdate/Reaction.cuh
+++ b/src/gpu/GksGpu/CellUpdate/Reaction.cuh
@@ -36,14 +36,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real uHead = c1o2 * prim.U;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 0, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 0, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             uHead += c1o4 * neighborPrim.U;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 1, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 1, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
@@ -57,14 +57,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real vHead = c1o2 * prim.V;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 2, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 2, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             vHead += c1o4 * neighborPrim.V;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 3, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 3, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
@@ -78,14 +78,14 @@ inline __host__ __device__ real getTurbulentViscosityDeardorff(const DataBaseStr
         real wHead = c1o2 * prim.W;
 
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 4, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 4, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
             wHead += c1o4 * neighborPrim.W;
         }
         {
-            uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 5, dataBase.numberOfCells)];
+            // uint neighborCellIndex = dataBase.cellToCell[CELL_TO_CELL(cellIndex, 5, dataBase.numberOfCells)];
             readCellData(cellIndex, dataBase, neighborCons);
             neighborPrim = toPrimitiveVariables(neighborCons, parameters.K);
 
diff --git a/src/gpu/GksGpu/DataBase/DataBase.cpp b/src/gpu/GksGpu/DataBase/DataBase.cpp
index 21c51b7575fbb46870b6b0397fe01d954e6458be..46921a683de3dd9c322be2d89b4ca66f6fa07020 100644
--- a/src/gpu/GksGpu/DataBase/DataBase.cpp
+++ b/src/gpu/GksGpu/DataBase/DataBase.cpp
@@ -14,6 +14,10 @@
 #include "GksMeshAdapter/GksMeshAdapter.h"
 #include "Communication/Communicator.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 namespace GksGpu {
 
 DataBase::DataBase( std::string type ) 
diff --git a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh b/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
index 3ff9848f15d57ab66ed5c8c6178df3afd1d4c581..2ad158173970c5bb36637643f621c729a8fcc37a 100644
--- a/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
+++ b/src/gpu/GksGpu/FlowStateData/AccessDeviceData.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
 #include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
index 3ec3a7c9e60cf60d14fec43eaa1d17792148fb15..3b7929b39b47761624fec7052becc55921990276 100644
--- a/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
+++ b/src/gpu/GksGpu/FlowStateData/FlowStateData.cuh
@@ -4,15 +4,22 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
-#include "Core/RealConstants.h"
 
 #include "Definitions/PassiveScalar.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 namespace GksGpu {
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh b/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
index c33f02ea3f9f4d13050c72b52b471b312a52b1c8..b7b759c99ffec6118a4173af098e0b372caf6ef7 100644
--- a/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
+++ b/src/gpu/GksGpu/FlowStateData/FlowStateDataConversion.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include "Core/DataTypes.h"
 #include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh b/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
index 3002ed3ddc275ce9dda7499430153b7460e2452f..04a164aa327bed36cca2b8756c87dd1c7d9f0a64 100644
--- a/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
+++ b/src/gpu/GksGpu/FlowStateData/HeatCapacities.cuh
@@ -1,12 +1,16 @@
 //#ifndef HeatCapacities_H
 //#define HeatCapacities_H
 //
-//#ifdef __CUDACC__
-//#include <cuda_runtime.h>
-//#else
-//#define __host__
-//#define __device__
-//#endif
+// #ifdef __CUDACC__
+// #include <cuda_runtime.h>
+// #else
+// #ifndef __host__
+// #define __host__
+// #endif
+// #ifndef __device__
+// #define __device__
+// #endif
+// #endif
 //
 //#include "Core/DataTypes.h"
 //#include "Core/RealConstants.h"
diff --git a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh b/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
index 9f3a268a5d8a7e64d59b0c1b60eeec8d68423174..47eb261a089b9a1c8d7bb14bca864c334887d447 100644
--- a/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
+++ b/src/gpu/GksGpu/FlowStateData/ThermalDependencies.cuh
@@ -4,9 +4,13 @@
 #ifdef __CUDACC__
 #include <cuda_runtime.h>
 #else
+#ifndef __host__
 #define __host__
+#endif
+#ifndef __device__
 #define __device__
 #endif
+#endif
 
 #include <math.h>
 
diff --git a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu b/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
index 8c935863615eb1c3f117c87b6ba57e92a0061c0f..25ba5726bfd505518bf82b88accea1c3549c5b96 100644
--- a/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
+++ b/src/gpu/GksGpu/FluxComputation/FluxComputation.cu
@@ -152,7 +152,7 @@ __host__ __device__ inline void fluxFunction(DataBaseStruct dataBase, Parameters
     {
         if( parameters.spongeLayerIdx == 0 )
         {
-            real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
+            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
             real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
 
             real muNew = parameters.mu;
@@ -168,7 +168,7 @@ __host__ __device__ inline void fluxFunction(DataBaseStruct dataBase, Parameters
         }
         if( parameters.spongeLayerIdx == 1 )
         {
-            real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
+            // real x = dataBase.faceCenter[VEC_X(faceIndex, dataBase.numberOfFaces)];
             real z = dataBase.faceCenter[VEC_Z(faceIndex, dataBase.numberOfFaces)];
 
             real muNew = parameters.mu;
diff --git a/src/gpu/GksGpu/Output/VtkWriter.cpp b/src/gpu/GksGpu/Output/VtkWriter.cpp
index a1a0ab9f62f275107e790a34c407c83adc09ab2d..234151c7df481e81e5dd68c9a4692831f7271f54 100644
--- a/src/gpu/GksGpu/Output/VtkWriter.cpp
+++ b/src/gpu/GksGpu/Output/VtkWriter.cpp
@@ -47,6 +47,8 @@
 #include "FlowStateData/FlowStateDataConversion.cuh"
 #include "FlowStateData/AccessDeviceData.cuh"
 
+namespace GksGpu {
+
 void VtkWriter::write(std::shared_ptr<DataBase> dataBase, Parameters parameters, std::string filename)
 {
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write " << filename << ".vtu" << " ... \n";
@@ -144,3 +146,5 @@ void VtkWriter::write(std::shared_ptr<DataBase> dataBase, Parameters parameters,
 
     *logging::out << logging::Logger::INFO_INTERMEDIATE << "done!\n";
 }
+
+}
diff --git a/src/gpu/GksGpu/Output/VtkWriter.h b/src/gpu/GksGpu/Output/VtkWriter.h
index 0596fc7bd164050236b8db54a31ab7689a84d01f..679fae55b2db5ec418b389ca0840961ab8f80dde 100644
--- a/src/gpu/GksGpu/Output/VtkWriter.h
+++ b/src/gpu/GksGpu/Output/VtkWriter.h
@@ -38,9 +38,12 @@
 
 #include "GksGpu_export.h"
 
+namespace GksGpu {
+
 struct DataBase;
 struct Parameters;
 
+
 class GKSGPU_EXPORT VtkWriter
 {
 public:
@@ -49,4 +52,6 @@ public:
                        std::string filename );
 };
 
+}
+
 #endif
\ No newline at end of file
diff --git a/src/gpu/GksMeshAdapter/CMakeLists.txt b/src/gpu/GksMeshAdapter/CMakeLists.txt
index cb00b3c016786c41ef5640eb362322bb0a3768f8..b9a2d12df4d0bee9396a706c6636b5f4056b2d3a 100644
--- a/src/gpu/GksMeshAdapter/CMakeLists.txt
+++ b/src/gpu/GksMeshAdapter/CMakeLists.txt
@@ -1,3 +1,3 @@
 project(GksMeshAdapter LANGUAGES CUDA CXX)
 
-vf_add_library(PRIVATE_LINK basics GridGenerator)
+vf_add_library(PRIVATE_LINK basics GridGenerator lbmCuda)
diff --git a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp b/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
index 16f5c208565ff090cd2344348d5e47150babe84e..8d032dfeead2f582c5af2426c45b09ead33883cc 100644
--- a/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
+++ b/src/gpu/GksMeshAdapter/GksMeshAdapter.cpp
@@ -22,6 +22,10 @@
 #include "MeshCell.h"
 #include "MeshFace.h"
 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
 using namespace vf::gpu;
 
 GksMeshAdapter::GksMeshAdapter(SPtr<MultipleGridBuilder> gridBuilder)
@@ -518,7 +522,7 @@ void GksMeshAdapter::sortFaces()
     // sort into blocks
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    std::array<char, 3> orientations = {'x', 'y', 'z'};
+    // std::array<char, 3> orientations = {'x', 'y', 'z'};
 
     for( uint level = 0; level < this->gridBuilder->getNumberOfLevels(); level++ )
     {
@@ -527,17 +531,17 @@ void GksMeshAdapter::sortFaces()
             uint start =         this->startOfFacesPerLevelXYZ [ 3 * level + idx];
             uint end   = start + this->numberOfFacesPerLevelXYZ[ 3 * level + idx];
 
-            real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
-            real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
-            real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
+            // real xMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
+            // real yMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
+            // real zMax = (*std::max_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
 
             real xMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.x < rhs.faceCenter.x; })).faceCenter.x;
             real yMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.y < rhs.faceCenter.y; })).faceCenter.y;
             real zMin = (*std::min_element(this->faces.begin() + start, this->faces.begin() + end, [this](MeshFace lhs, MeshFace rhs) { return lhs.faceCenter.z < rhs.faceCenter.z; })).faceCenter.z;
 
-            real xRange = xMax - xMin;
-            real yRange = yMax - yMin;
-            real zRange = zMax - zMin;
+            // real xRange = xMax - xMin;
+            // real yRange = yMax - yMin;
+            // real zRange = zMax - zMin;
 
             uint blockDim = 8;
 
diff --git a/src/gpu/GksVtkAdapter/CMakeLists.txt b/src/gpu/GksVtkAdapter/CMakeLists.txt
index 644dc6defa101644338fc35211efa7eccabca3b0..fdc7a1eb56f548afc58e83ef7b0f7ad02ad12ea9 100644
--- a/src/gpu/GksVtkAdapter/CMakeLists.txt
+++ b/src/gpu/GksVtkAdapter/CMakeLists.txt
@@ -1,5 +1,5 @@
 
 
-vf_add_library(BUILDTYPE shared PRIVATE_LINK basics GksGpu)
+vf_add_library(BUILDTYPE static PRIVATE_LINK basics GksGpu)
 
 include (${VF_CMAKE_DIR}/3rd/vtk.cmake)
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt
index 8f65576fc55767f4eea1c30a6241f03fe031bbc5..07b6125d6ecd83dca59e20e7c286ebc2b8d14715 100644
--- a/src/gpu/GridGenerator/CMakeLists.txt
+++ b/src/gpu/GridGenerator/CMakeLists.txt
@@ -1,3 +1,7 @@
 project(GridGenerator LANGUAGES CXX)
 
 vf_add_library(PRIVATE_LINK basics OpenMP::OpenMP_CXX)
+
+if(NOT MSVC) 
+   target_compile_options(GridGenerator PRIVATE "-Wno-strict-aliasing")
+endif()
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h b/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h
index 1fa27b68ecb127c469a6a4f7fc7f7d094110dd8f..2a495d3a4b7a4854079b62ba979baaea0d7db5f0 100644
--- a/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h
+++ b/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBox.h
@@ -34,7 +34,6 @@
 #define BoundingBox_h
 
 #include <vector>
-#include <cuda_runtime.h>
 
 #include "global.h"
 
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
index 3ecacc9f71f6817659f67431dd91d95e40455ad4..2d31de98bf1f5530ada555e548ac6bb40e5e51b7 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
@@ -34,7 +34,6 @@
 #define TriangularMesh_h
 
 #include <stdio.h>
-#include <cuda_runtime.h>
 #include <vector>
 #include <string>
 #include <memory>
diff --git a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
index 492c7e1a0de34336d15a8a3c3030dc6733310d8b..7b27d853f652459143699204c59a5843de6eaf39 100644
--- a/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
+++ b/src/gpu/GridGenerator/geometries/Vertex/Vertex.h
@@ -34,7 +34,6 @@
 #define VERTEX_H
 
 #include <stdio.h>
-#include <cuda_runtime.h>
 #include <memory>
 #include <ostream>
 
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index c6443e1a83fb08178fd1adf4c20d3663ea372210..14fdadba44069dbb098f8922b208397a609275af 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -5,7 +5,7 @@ if(MSVC)
     set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix
 endif()
 
-vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX)
+vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX cuda)
 
 
 #SET(TPN_WIN32 "/EHsc")
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 632ab4e7d3b3a96c7bde1b1ea8838b9ef08128f2..f6c41a61ce7c8654ae1764ff01fe6a8d87127563 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -1,4 +1,4 @@
-#include "Calculation/UpdateGrid27.h"
+#include "UpdateGrid27.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "Calculation/DragLift.h"
@@ -9,7 +9,7 @@
 #include "Kernel/Kernel.h"
 
 void updateGrid27(Parameter* para, 
-                  vf::gpu::Communicator* comm, 
+                  vf::gpu::Communicator& comm, 
                   CudaMemoryManager* cudaManager, 
                   std::vector<std::shared_ptr<PorousMedia>>& pm, 
                   int level, 
@@ -60,9 +60,9 @@ void updateGrid27(Parameter* para,
         coarseToFine(para, level);
     }
 
-    visitActuators(para, cudaManager, level, t);
+    interactWithActuators(para, cudaManager, level, t);
 
-    visitProbes(para, cudaManager, level, t);
+    interactWithProbes(para, cudaManager, level, t);
 }
 
 void collision(Parameter* para, std::vector<std::shared_ptr<PorousMedia>>& pm, int level, unsigned int t, std::vector < SPtr< Kernel>>& kernels)
@@ -153,7 +153,7 @@ void collisionAdvectionDiffusion(Parameter* para, int level)
 	}
 }
 
-void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
     if (para->getNumprocs() > 1)
 	{
@@ -1264,18 +1264,18 @@ void coarseToFine(Parameter* para, int level)
 
 }
 
-void visitActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
 {
     for( SPtr<PreCollisionInteractor> actuator: para->getActuators() )
     {
-        actuator->visit(para, cudaManager, level, t);
+        actuator->interact(para, cudaManager, level, t);
     }
 }
 
-void visitProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
 {
     for( SPtr<PreCollisionInteractor> probe: para->getProbes() )
     {
-        probe->visit(para, cudaManager, level, t);
+        probe->interact(para, cudaManager, level, t);
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 0fbf33cc20dd4c3d26e9f45a23318342b7a6a4d4..4f52fd63fedec9e571efb7c464b31f8e2eb823c4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -11,7 +11,7 @@
 class Kernel;
 
 extern "C" void updateGrid27(Parameter* para, 
-                             vf::gpu::Communicator* comm, 
+                             vf::gpu::Communicator& comm, 
                              CudaMemoryManager* cudaManager, 
                              std::vector<std::shared_ptr<PorousMedia>>& pm, 
                              int level,
@@ -24,7 +24,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt
 
 extern "C" void collisionAdvectionDiffusion(Parameter* para, int level);
 
-extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 
 extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t);
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
index 32fd45109538101a35e253caff102c4f4df1a4a5..2743f454e321bf21cb4d0b7fd08aab8600a2bee8 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
@@ -1,9 +1,9 @@
 #include "Communicator.h"
+
 #include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <vector>
-#include <string.h>
+
+#include <logger/Logger.h>
 
 #if defined (_WIN32) || defined (_WIN64)
    #include <Winsock2.h>
@@ -12,13 +12,19 @@
 #endif
 //lib for windows Ws2_32.lib
 
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 
+
 Communicator::Communicator()
 {
+    int mpiInitialized = 0; // false
+    MPI_Initialized(&mpiInitialized);
+    if (!mpiInitialized) {
+        MPI_Init(NULL, NULL);
+        VF_LOG_TRACE("vf::gpu::Communicator(): MPI_Init");
+    }
+
     MPI_Comm_rank(MPI_COMM_WORLD, &PID);
     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 
@@ -29,21 +35,25 @@ Communicator::Communicator()
     // Get my position in this communicator, and my neighbors
     MPI_Cart_shift(comm1d, 0, 1, &nbrbottom, &nbrtop);
 }
-// Crap by Martin Sch.
-Communicator::Communicator(const int numberOfProcs)
+
+Communicator::~Communicator()
 {
-    MPI_Comm_rank(MPI_COMM_WORLD, &PID);
-    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
-    commGPU = MPI_COMM_WORLD;
-    requestGPU.resize(0);
-    rcount = 0;
+    // proof if MPI is finalized
+    int _mpiFinalized = 0; // false
+    MPI_Finalized(&_mpiFinalized);
+    if (!_mpiFinalized) {
+        MPI_Finalize();
+        VF_LOG_TRACE("vf::gpu::~Communicator(): MPI_Finalize");
+    }
 }
-Communicator *Communicator::instanz = 0;
-Communicator *Communicator::getInstanz()
+
+
+// C++11 thread safe singelton implementation:
+// https://stackoverflow.com/questions/1661529/is-meyers-implementation-of-the-singleton-pattern-thread-safe
+Communicator& Communicator::getInstance()
 {
-    if (instanz == 0)
-        instanz = new Communicator(0);
-    return instanz;
+    static Communicator comm;
+    return comm;
 }
 
 void Communicator::exchngBottomToTop(float *sbuf, float *rbuf, int count)
@@ -189,7 +199,7 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec
                     counter++;
             }
             if (counter >= maxdev) {
-                fprintf(stderr, "More processes than GPUs!\n");
+                VF_LOG_CRITICAL("More processes than GPUs!");
                 exit(1);
             }
             map[i] = devices[counter];
@@ -198,12 +208,11 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec
 
     MPI_Scatter(map, 1, MPI_UNSIGNED, &device, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
 
-    printf("Rank: %d runs on host: %s with GPU: %d\n", rank, hostname, device);
+    VF_LOG_INFO("Rank: {} runs on host: {} with GPU: {}", rank, hostname, device);
 
     free(map);
     free(host);
     return device;
 }
 
-} // namespace GPU
-} // namespace VF
+}
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
index 72c4a136ece03098c10ea65493ba02a0109ed95d..256dde87e8ff6b3a8c7abcae0ac31466cc68ba95 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
@@ -3,14 +3,11 @@
 
 #include <vector>
 
-
-
-
 #include <mpi.h>
 
 #include "VirtualFluids_GPU_export.h"
 
-#include "LBM/LB.h"
+#include <basics/Core/DataTypes.h>
 
 //////////////////////////////////
 #ifdef VF_DOUBLE_ACCURACY
@@ -21,48 +18,46 @@
 //////////////////////////////////
 
 
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 
 
 class VIRTUALFLUIDS_GPU_EXPORT Communicator
 {
 public:
-	static Communicator* getInstanz();
-	static Communicator* getInstanz(const int numberOfProcs);
-	void exchngBottomToTop(float* sbuf, float* rbuf, int count);
-	void exchngTopToBottom(float* sbuf, float* rbuf, int count);
-   void waitAll();
-   void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode);
-	int getPID() const;
-	int getNummberOfProcess() const;
-	int getNeighbourTop();
-	int getNeighbourBottom();
-   void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count);
-   void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb);
-   //////////////////////////////////////////////////////////////////////////
-   void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
-   void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
-   void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank );
-   void nbSendDataGPU( real* sbuf, int count_s, int nb_rank );
-   void waitallGPU();
-   void sendDataGPU( real* sbuf, int count_s, int nb_rank );
-   void waitGPU(int id);
-   void resetRequest();
-   void barrierGPU();
-   void barrier();
-   //////////////////////////////////////////////////////////////////////////
-   void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count);
-	MPI_Comm getCommunicator();
-	void startTimer();
-	void stopTimer();
-	double getTime();
-	int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev);
-protected:
+    static Communicator& getInstance();
+    Communicator(const Communicator&) = delete;
+    Communicator& operator=(const Communicator&) = delete;
+
+    void exchngBottomToTop(float* sbuf, float* rbuf, int count);
+    void exchngTopToBottom(float* sbuf, float* rbuf, int count);
+    void waitAll();
+    void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode);
+    int getPID() const;
+    int getNummberOfProcess() const;
+    int getNeighbourTop();
+    int getNeighbourBottom();
+    void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count);
+    void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb);
+    //////////////////////////////////////////////////////////////////////////
+    void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
+    void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
+    void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank );
+    void nbSendDataGPU( real* sbuf, int count_s, int nb_rank );
+    void waitallGPU();
+    void sendDataGPU( real* sbuf, int count_s, int nb_rank );
+    void waitGPU(int id);
+    void resetRequest();
+    void barrierGPU();
+    void barrier();
+    //////////////////////////////////////////////////////////////////////////
+    void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count);
+    MPI_Comm getCommunicator();
+    void startTimer();
+    void stopTimer();
+    double getTime();
+    int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev);
 private:
-   static Communicator* instanz;
    int numprocs, PID;
    int nbrbottom, nbrtop; 
    MPI_Comm comm1d, commGPU;
@@ -75,12 +70,10 @@ private:
    double starttime;
    double endtime;
    Communicator();
-   Communicator(const int numberOfProcs);
-   Communicator(const Communicator&);
+   ~Communicator();
 };
 
-} // namespace GPU
-} // namespace VF
+}
 
 #endif
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 8f89656ac6feb7dfe2644a2b6d604ccec510c3cb..d91e86c3140bb08aa2d8ef28d7cc147b23a2b804 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -7,7 +7,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -30,7 +30,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
 							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborX[i].rankNeighbor);
 	}
@@ -38,7 +38,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	//}
@@ -46,13 +46,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	}
@@ -60,13 +60,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -88,7 +88,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -111,7 +111,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
 							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborX[i].rankNeighbor);
 	}
@@ -119,7 +119,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	//}
@@ -127,13 +127,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	}
@@ -141,13 +141,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -176,7 +176,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -199,7 +199,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
 							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborY[i].rankNeighbor);
 	}
@@ -207,7 +207,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	//}
@@ -215,13 +215,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	}
@@ -229,13 +229,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -257,7 +257,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -280,7 +280,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
 							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborY[i].rankNeighbor);
 	}
@@ -288,7 +288,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	//}
@@ -296,13 +296,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	}
@@ -310,13 +310,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -345,7 +345,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -368,7 +368,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor);
 	}
@@ -376,7 +376,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	//}
@@ -384,13 +384,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	}
@@ -398,13 +398,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -426,7 +426,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -449,7 +449,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor);
 	}
@@ -457,7 +457,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	//}
@@ -465,13 +465,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	}
@@ -479,13 +479,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -529,7 +529,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //1D domain decomposition
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
 	{
@@ -547,7 +547,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda
 		//////////////////////////////////////////////////////////////////////////
 		cudaManager->cudaCopyProcessNeighborFsDH(level, i);
 		//////////////////////////////////////////////////////////////////////////
-		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
 							para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighbor[i].f[0],
 							para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -574,7 +574,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
 	{
@@ -592,7 +592,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 		//////////////////////////////////////////////////////////////////////////
 		cudaManager->cudaCopyProcessNeighborFsDH(level, i);
 		//////////////////////////////////////////////////////////////////////////
-		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
 							para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighbor[i].f[0],
 							para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -623,7 +623,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// X
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 //	{
@@ -641,7 +641,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborXFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborX[i].f[0],
 //							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
@@ -663,7 +663,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 //	{
@@ -681,7 +681,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborXFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborX[i].f[0],
 //							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
@@ -710,7 +710,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Y
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //	{
@@ -728,7 +728,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborYFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborY[i].f[0],
 //							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -750,7 +750,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //	{
@@ -768,7 +768,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborYFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborY[i].f[0],
 //							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -797,7 +797,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Z
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //	{
@@ -815,7 +815,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborZFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -837,7 +837,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //	{
@@ -855,7 +855,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborZFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -932,7 +932,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -955,7 +955,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
 							para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
 	}
@@ -963,7 +963,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	//}
@@ -971,13 +971,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	}
@@ -985,13 +985,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1013,7 +1013,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1036,7 +1036,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
 							para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
 	}
@@ -1044,7 +1044,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	//}
@@ -1052,13 +1052,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	}
@@ -1066,13 +1066,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1101,7 +1101,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1124,7 +1124,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
 							para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1132,7 +1132,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	//}
@@ -1140,13 +1140,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1154,13 +1154,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1182,7 +1182,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1205,7 +1205,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
 							para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1213,7 +1213,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	//}
@@ -1221,13 +1221,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1235,13 +1235,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1270,7 +1270,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1293,7 +1293,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1301,7 +1301,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	//}
@@ -1309,13 +1309,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1323,13 +1323,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1351,7 +1351,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1374,7 +1374,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1382,7 +1382,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	//}
@@ -1390,13 +1390,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1404,13 +1404,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1486,7 +1486,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1510,7 +1510,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3X[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor);
@@ -1519,7 +1519,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3X[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor);
@@ -1528,13 +1528,13 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1564,7 +1564,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1588,7 +1588,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3Y[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor);
@@ -1597,7 +1597,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3Y[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor);
@@ -1606,13 +1606,13 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1642,7 +1642,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1666,7 +1666,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3Z[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor);
@@ -1675,7 +1675,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3Z[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor);
@@ -1684,13 +1684,13 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 34c9cba801c675bdf4c2cd39daca3be2d7918dbe..82662cdc55e8b0ff5f4afe7d31a6563579b45559 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -9,31 +9,31 @@
 
 //////////////////////////////////////////////////////////////////////////
 //1D domain decomposition
-extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition
-extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition convection diffusion
-extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition F3 - K18/K20
-extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
-extern "C" void barrierGPU(vf::gpu::Communicator* comm);
+extern "C" void barrierGPU(vf::gpu::Communicator& comm);
 //////////////////////////////////////////////////////////////////////////
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/devCheck.cpp b/src/gpu/VirtualFluids_GPU/GPU/devCheck.cpp
deleted file mode 100644
index dfe57620a06ff116bbd5a0d98828637057e5bc69..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/GPU/devCheck.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
-//
-//////////////////////////////////////////////////////////////////////////
-#include "devCheck.h"
-
-#include <stdio.h>
-#include <stdlib.h> 
-
-#include <cuda_runtime.h>
-
-
-int devCheck(int gpudevice)
-{
-	int device_count = 0;
-	int device;  // used with  cudaGetDevice() to verify cudaSetDevice() 
-
-   // get the number of non-emulation devices  detected 
-	cudaGetDeviceCount(&device_count);
-	if (gpudevice > device_count)
-	{
-		printf("gpudevice >=  device_count ... exiting\n");
-		exit(1);
-	}
-	cudaError_t cudareturn;
-	cudaDeviceProp deviceProp;
-
-	// cudaGetDeviceProperties() is also  demonstrated in the deviceQuery/ example
-	// of the sdk projects directory 
-	cudaGetDeviceProperties(&deviceProp, gpudevice);
-	printf("[compute capability] = [%d.%d]\n",
-		deviceProp.major, deviceProp.minor);
-
-	if (deviceProp.major > 999)
-	{
-		printf("warning, CUDA Device  Emulation (CPU) detected, exiting\n");
-		exit(1);
-	}
-
-	// choose a cuda device for kernel  execution 
-	cudareturn = cudaSetDevice(gpudevice);
-	if (cudareturn == cudaErrorInvalidDevice)
-	{
-		perror("cudaSetDevice returned  cudaErrorInvalidDevice");
-	}
-	else
-	{
-		// double check that device was  properly selected 
-		cudaGetDevice(&device);
-		//printf("cudaGetDevice()=%d\n",device); 
-		return device;
-	}
-	return -1;
-}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/devCheck.h b/src/gpu/VirtualFluids_GPU/GPU/devCheck.h
deleted file mode 100644
index 6357e4ac51f35b71fe463646e8ca51f5b231ed9c..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/GPU/devCheck.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef DEVCHECK_H
-#define DEVCHECK_H
-
-int devCheck(int gpudevice); 
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index 08b71d42e79564d9eac887289a1ae36824095c46..47f689b7b3c88a6c7591454909cc6875384908c1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -5,7 +5,7 @@
 
 #include <memory>
 
-#include "Utilities/CudaGrid.h"
+#include <cuda/CudaGrid.h>
 
 class CheckParameterStrategy;
 class Parameter;
@@ -31,7 +31,7 @@ protected:
     std::vector<PreProcessorType> myPreProcessorTypes;
     KernelGroup myKernelGroup;
 
-    vf::gpu::CudaGrid cudaGrid;
+    vf::cuda::CudaGrid cudaGrid;
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
index 4c82851996646590d8c246df5f940b58a308d52c..b86af5f876286599146190a1244a5bf21e2948fd 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -4,7 +4,6 @@
 
 #include "Parameter/Parameter.h"
 #include "../RunLBMKernel.cuh"
-#include "Kernel/Utilities/CudaGrid.h"
 
 #include <lbm/BGK.h>
 
@@ -26,7 +25,7 @@ BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level)
 
     myKernelGroup = BasicKernel;
 
-    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+    this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
index b6f5d21ccf909f1ce3bcf11a4558f4771d87d021..50f8d32f06ad25a4ab9d6f43ce5d908570d9b332 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -25,7 +25,7 @@ CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int leve
 
     myKernelGroup = BasicKernel;
 
-    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+    this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
 void CumulantK15Unified::run()
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
index 989fce0c5e797ef90d644845f6c502bee700f6e1..e6ab61260b2c20ea5aec868ed70d714a066f2539 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -4,7 +4,6 @@
 
 #include "Parameter/Parameter.h"
 #include "../RunLBMKernel.cuh"
-#include "Kernel/Utilities/CudaGrid.h"
 
 #include <lbm/CumulantChimera.h>
 
@@ -25,7 +24,7 @@ CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int leve
 
     myKernelGroup = BasicKernel;
 
-    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+    this->cudaGrid = cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
 }
 
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
deleted file mode 100644
index fa17bf449915eba509dbabbe71f556c19fa43bcf..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "CudaGrid.h"
-
-
-
-namespace vf
-{
-namespace gpu
-{
-
-CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int size_matrix)
-{
-    int Grid = (size_matrix / numberOfThreads) + 1;
-    int Grid1, Grid2;
-    if (Grid > 512) {
-        Grid1 = 512;
-        Grid2 = (Grid / Grid1) + 1;
-    } else {
-        Grid1 = 1;
-        Grid2 = Grid;
-    }
-    
-    grid = dim3(Grid1, Grid2);
-    threads = dim3(numberOfThreads, 1, 1);
-}
-
-}
-}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
deleted file mode 100644
index 27a18a58843b0de064009ab0f837518e3bb44b9d..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef GPU_CUDA_GRID_H
-#define GPU_CUDA_GRID_H
-
-
-#include <cuda_runtime.h>
-
-namespace vf
-{
-namespace gpu
-{
-
-
-struct CudaGrid 
-{
-    dim3 threads;
-    dim3 grid;
-
-    CudaGrid(unsigned int numberOfEntities, unsigned int threadsPerBlock);
-    CudaGrid() = default;
-};
-
-}
-}
-
-#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 78f8742b915f1465a35da47c2c560cd32693ce36..737b4edad35eee4829065b89fc1b12eb2c43ef4d 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -10,7 +10,6 @@
 #include "Communication/ExchangeData27.h"
 #include "Parameter/Parameter.h"
 #include "GPU/GPU_Interface.h"
-#include "GPU/devCheck.h"
 #include "basics/utilities/UbFileOutputASCII.h"
 //////////////////////////////////////////////////////////////////////////
 #include "Output/MeasurePointWriter.hpp"
@@ -46,7 +45,15 @@
 #include "PreProcessor/PreProcessorFactory/PreProcessorFactory.h"
 #include "Kernel/Kernel.h"
 
+#include <cuda/DeviceInfo.h>
 
+#include <logger/Logger.h>
+
+
+Simulation::Simulation(vf::gpu::Communicator& communicator) : communicator(communicator)
+{
+
+}
 
 std::string getFileName(const std::string& fname, int step, int myID)
 {
@@ -77,10 +84,9 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    this->gridProvider = gridProvider;
    this->cudaManager = cudaManager;
    gridProvider->initalGridInformations();
-   comm = vf::gpu::Communicator::getInstanz();
    this->para = para;
 
-   devCheck(comm->mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
+   vf::cuda::verifyAndSetDevice(communicator.mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
    
    para->initLBMSimulationParameter();
 
@@ -255,7 +261,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
 
    //////////////////////////////////////////////////////////////////////////
    //output << "define the Grid..." ;
-   //defineGrid(para, comm);
+   //defineGrid(para, communicator);
    ////allocateMemory();
    //output << "done.\n";
 
@@ -418,7 +424,7 @@ void Simulation::run()
 	////////////////////////////////////////////////////////////////////////////////
 	for(t=para->getTStart();t<=para->getTEnd();t++)
 	{
-        updateGrid27(para.get(), comm, cudaManager.get(), pm, 0, t, kernels);
+        updateGrid27(para.get(), communicator, cudaManager.get(), pm, 0, t, kernels);
 
 	    ////////////////////////////////////////////////////////////////////////////////
 	    //Particles
@@ -433,7 +439,7 @@ void Simulation::run()
         // run Analyzers for kinetic energy and enstrophy for TGV in 3D
         // these analyzers only work on level 0
 	    ////////////////////////////////////////////////////////////////////////////////
-        if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0);
+        if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), communicator, cudaManager.get(), 0);
 
 	    if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t);
 	    if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(t);
@@ -627,7 +633,7 @@ void Simulation::run()
 	  ////////////////////////////////////////////////////////////////////////////////
       // File IO
       ////////////////////////////////////////////////////////////////////////////////
-      //comm->startTimer();
+      //communicator->startTimer();
       if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getTStartOut())
       {
 		  //////////////////////////////////////////////////////////////////////////////////
@@ -673,7 +679,7 @@ void Simulation::run()
             {
 		        //////////////////////////////////////////////////////////////////////////
 		        //exchange data for valid post process
-		        exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev);
+		        exchangeMultiGPU(para.get(), communicator, cudaManager.get(), lev);
                 //////////////////////////////////////////////////////////////////////////
                //if (para->getD3Qxx()==19)
                //{
@@ -1296,7 +1302,4 @@ void Simulation::free()
 		probe->free(para.get(), cudaManager.get());
 	}
 	//////////////////////////////////////////////////////////////////////////
-
-    delete comm;
-
-}
\ No newline at end of file
+}
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index d6c6702c4db8a671d4f6dbfea4c90cdf8f48356d..72c86140258b01aec3b3ed00d59c271f1824d514 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -13,13 +13,7 @@
 
 #include "VirtualFluids_GPU_export.h"
 
-namespace vf
-{
-namespace gpu
-{
-class Communicator;
-}
-}
+namespace vf::gpu { class Communicator; }
 
 class CudaMemoryManager;
 class Parameter;
@@ -38,6 +32,7 @@ class TrafficMovementFactory;
 class VIRTUALFLUIDS_GPU_EXPORT Simulation
 {
 public:
+	Simulation(vf::gpu::Communicator& communicator);
 	void run();
 	void init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std::shared_ptr<DataWriter> dataWriter, std::shared_ptr<CudaMemoryManager> cudaManager);
 	void free();
@@ -67,7 +62,7 @@ protected:
 
 	LogWriter output;
 
-    vf::gpu::Communicator* comm;
+	vf::gpu::Communicator& communicator;
     SPtr<Parameter> para;
     SPtr<GridProvider> gridProvider;
     SPtr<DataWriter> dataWriter;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
index 3a24c3efc6b5cda0121aa483581c51e288d43f4f..283b0dc7bf1570b7fd1bb0c91f53ff4e8b4ca503 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.cu
@@ -4,7 +4,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 
-#include "Kernel/Utilities/CudaGrid.h"
+#include <cuda/CudaGrid.h>
 #include "lbm/constants/NumericConstants.h"
 #include "VirtualFluids_GPU/GPU/GeometryUtils.h"
 
@@ -164,14 +164,14 @@ void ActuatorLine::init(Parameter* para, GridProvider* gridProvider, CudaMemoryM
 }
 
 
-void ActuatorLine::visit(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+void ActuatorLine::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
 {
     if (level != this->level) return;
 
     cudaManager->cudaCopyBladeCoordsHtoD(this);
 
     uint numberOfThreads = para->getParH(level)->numberofthreads;
-    vf::gpu::CudaGrid bladeGrid = vf::gpu::CudaGrid(numberOfThreads, this->numberOfNodes);
+    vf::cuda::CudaGrid bladeGrid = vf::cuda::CudaGrid(numberOfThreads, this->numberOfNodes);
 
     interpolateVelocities<<< bladeGrid.grid, bladeGrid.threads >>>(
         para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
@@ -188,7 +188,8 @@ void ActuatorLine::visit(Parameter* para, CudaMemoryManager* cudaManager, int le
 
     cudaManager->cudaCopyBladeForcesHtoD(this);
 
-    vf::gpu::CudaGrid sphereGrid = vf::gpu::CudaGrid(numberOfThreads, this->numberOfIndices);
+    vf::cuda::CudaGrid sphereGrid = vf::cuda::CudaGrid(numberOfThreads, this->numberOfIndices);
+
     applyBodyForces<<<sphereGrid.grid, sphereGrid.threads>>>(
         para->getParD(this->level)->coordX_SP, para->getParD(this->level)->coordY_SP, para->getParD(this->level)->coordZ_SP,        
         para->getParD(this->level)->forceX_SP, para->getParD(this->level)->forceY_SP, para->getParD(this->level)->forceZ_SP,        
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
index a8c93507f52dc0de24ba5f5ed4d652cd4d7cbb91..767ef29516295f4f52bde099607e56233e6f51ce 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorLine.h
@@ -41,7 +41,7 @@ public:
     virtual ~ActuatorLine(){};
 
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
-    void visit(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
     void free(Parameter* para, CudaMemoryManager* cudaManager) override;
     void write(uint t);
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
index 3aed65d8353601ec51d355722c7950235bd6ecbb..78b4d5e9ba148651e78c38758624de69dd08c47d 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PreCollisionInteractor.h
@@ -31,7 +31,7 @@ public:
     }
 
     virtual void init(Parameter *para, GridProvider *gridProvider, CudaMemoryManager *cudaManager) = 0;
-    virtual void visit(Parameter *para, CudaMemoryManager *cudaManager, int level, uint t) = 0;
+    virtual void interact(Parameter *para, CudaMemoryManager *cudaManager, int level, uint t) = 0;
     virtual void free(Parameter *para, CudaMemoryManager *cudaManager) = 0;
 
 protected:
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index 5a90af053968d773df2623c88a8728299015d4c8..cf03d639add0c883793c6ffad041e7b6da6d98d3 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -1,6 +1,6 @@
 #include "PlaneProbe.h"
 
-#include "Kernel/Utilities/CudaGrid.h"
+#include <cuda/CudaGrid.h>
 
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -41,7 +41,7 @@ void PlaneProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 
 void PlaneProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level)
 {
-    vf::gpu::CudaGrid grid = vf::gpu::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
     interpQuantities<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
                                                     probeStruct->distXD, probeStruct->distYD, probeStruct->distZD,
                                                     para->getParD(level)->vx_SP, para->getParD(level)->vy_SP, para->getParD(level)->vz_SP, para->getParD(level)->rho_SP, 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index bf8875c29252759bef9e557a173559ae035b4e94..76467d8da942cb189516571db66a473e5c4c32d5 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -4,7 +4,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 
-#include "Kernel/Utilities/CudaGrid.h"
+#include <cuda/CudaGrid.h>
 
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
@@ -44,7 +44,7 @@ void PointProbe::findPoints(Parameter* para, GridProvider* gridProvider, std::ve
 
 void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, int level)
 {
-    vf::gpu::CudaGrid grid = vf::gpu::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
+    vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nPoints);
 
     interpQuantities<<<grid.grid, grid.threads>>>(  probeStruct->pointIndicesD, probeStruct->nPoints, probeStruct->vals,
                                                     probeStruct->distXD, probeStruct->distYD, probeStruct->distZD,
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index 8c02f7227da3ff4d52758604c5acb7e68c31a261..e31e121ac709f922c08e2eda091ea1f3920ee899 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -224,7 +224,7 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaManager, std::vector<int>& pro
 }
 
 
-void Probe::visit(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
+void Probe::interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t)
 {
 
     if(t>this->tStartAvg)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
index d49ddd690e5b8f820e14b882eacf42e456a11a67..d60edaa22d6c4d0b1c8a26ddf4364828ad75f82e 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -57,7 +57,7 @@ public:
         assert("Output starts before averaging!" && tStartOut>=tStartAvg);
     }
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaManager) override;
-    void visit(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
+    void interact(Parameter* para, CudaMemoryManager* cudaManager, int level, uint t) override;
     void free(Parameter* para, CudaMemoryManager* cudaManager) override;
 
     SPtr<ProbeStruct> getProbeStruct(int level){ return this->probeParams[level]; }
diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt
index 56b03bded71b83d112d994959db0ba1d6245dc63..afa90bdd3f95bb71cf7f1eda6407f9b38766072a 100644
--- a/src/lbm/CMakeLists.txt
+++ b/src/lbm/CMakeLists.txt
@@ -7,6 +7,6 @@ if(BUILD_VF_CPU)
     vf_add_tests()
 endif()
 
-if(BUILD_VF_GPU)
+if(BUILD_VF_GPU OR BUILD_VF_GKS)
     add_subdirectory(cuda)
 endif()
diff --git a/src/logger/CMakeLists.txt b/src/logger/CMakeLists.txt
index 4c6c5294241eb5f3587e126003e7f669592905fa..32cbdae2aaa40f11b12e1f3fe1629f47ba4ca195 100644
--- a/src/logger/CMakeLists.txt
+++ b/src/logger/CMakeLists.txt
@@ -1,2 +1,2 @@
 
-vf_add_library(NAME logger PUBLIC_LINK spdlog)
+vf_add_library(NAME logger PUBLIC_LINK spdlog PRIVATE_LINK project_warnings)
diff --git a/src/logger/Logger.h b/src/logger/Logger.h
index 594decaf5bd85913335e6d1659b6d89cad6d0610..adb7796135a989843ef8de1f778c9901f3ad17c8 100644
--- a/src/logger/Logger.h
+++ b/src/logger/Logger.h
@@ -48,7 +48,7 @@
 #define VF_LOG_TRACE(...) spdlog::trace(__VA_ARGS__)
 #define VF_LOG_DEBUG(...) spdlog::debug(__VA_ARGS__)
 #define VF_LOG_INFO(...) spdlog::info(__VA_ARGS__)
-#define VF_LOG_WARNING(...) spdlog::warning(__VA_ARGS__)
+#define VF_LOG_WARNING(...) spdlog::warn(__VA_ARGS__)
 #define VF_LOG_CRITICAL(...) spdlog::critical(__VA_ARGS__)
 
 
diff --git a/src/mpi/CMakeLists.txt b/src/mpi/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de1d58f5bd39e14742180b9cc6a77fb640d117f6
--- /dev/null
+++ b/src/mpi/CMakeLists.txt
@@ -0,0 +1,2 @@
+
+vf_add_library(NAME mpi PUBLIC_LINK logger PRIVATE_LINK MPI::MPI_CXX basics)
diff --git a/src/cpu/VirtualFluidsCore/Parallel/Communicator.cpp b/src/mpi/Communicator.cpp
similarity index 91%
rename from src/cpu/VirtualFluidsCore/Parallel/Communicator.cpp
rename to src/mpi/Communicator.cpp
index b7ea54267edb7126573e512a7593e3f3f4312bd6..937f4d819f67804380d807bfe5ef6048ff507058 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/Communicator.cpp
+++ b/src/mpi/Communicator.cpp
@@ -34,12 +34,17 @@
 #include "Communicator.h"
 #include <basics/utilities/UbException.h>
 
-SPtr<Communicator> Communicator::instance = SPtr<Communicator>();
+namespace vf::mpi 
+{
+std::mutex Communicator::instantiation_mutex = std::mutex();
+std::shared_ptr<Communicator> Communicator::instance = std::shared_ptr<Communicator>();
 //////////////////////////////////////////////////////////////////////////
-SPtr<Communicator> Communicator::getInstance()
+std::shared_ptr<Communicator> Communicator::getInstance()
 {
     if (!instance)
         UB_THROW(UbException(UB_EXARGS, "Communicator isn't initialized correctly! You can not create a new instance "
                                         "of abstract Communicator class!"));
     return instance;
 }
+
+}
diff --git a/src/cpu/VirtualFluidsCore/Parallel/Communicator.h b/src/mpi/Communicator.h
similarity index 91%
rename from src/cpu/VirtualFluidsCore/Parallel/Communicator.h
rename to src/mpi/Communicator.h
index 28ba705480a6291aac3d31b3341f93f73a35f829..bcec064a23801c7b597d91deb601b44a1d9c7a71 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/Communicator.h
+++ b/src/mpi/Communicator.h
@@ -31,20 +31,29 @@
 //! \author Konstantin Kutscher
 //=======================================================================================
 
-#ifndef COMMUNICATOR_H
-#define COMMUNICATOR_H
+#ifndef MPI_COMMUNICATOR_H
+#define MPI_COMMUNICATOR_H
 
 #include <string>
 #include <vector>
+#include <memory>
+#include <sstream>
+#include <mutex>
 
-#include <PointerDefinitions.h>
+
+namespace vf::mpi 
+{
 
 //! \brief An abstract class for communication between processes in parallel computation
 class Communicator
 {
 public:
+    Communicator(const Communicator&) = delete;
+    Communicator & operator=(const Communicator& rhs) = delete;
+    static std::shared_ptr<Communicator> getInstance();
+
     virtual ~Communicator() = default;
-    static SPtr<Communicator> getInstance();
+
     virtual int getBundleID()                      = 0;
     virtual int getNumberOfBundles()               = 0;
     virtual int getProcessID()                     = 0;
@@ -84,9 +93,13 @@ public:
     virtual void broadcast(std::vector<long int> &values) = 0;
 
 protected:
-    Communicator()                     = default;
-    Communicator(const Communicator &) = default;
-    static SPtr<Communicator> instance;
+    Communicator() = default;
+
+    static std::mutex instantiation_mutex;
+
+    static std::shared_ptr<Communicator> instance;
 };
 
+}
+
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.cpp b/src/mpi/MPICommunicator.cpp
similarity index 95%
rename from src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.cpp
rename to src/mpi/MPICommunicator.cpp
index 4e5b5cb79ac01f730d5f1b92a8238605992d3825..4e7a155ef3b78f7daa0a582f84f2ef8af83886c8 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.cpp
+++ b/src/mpi/MPICommunicator.cpp
@@ -5,11 +5,22 @@
 
 #include <sstream>
 using namespace std;
+
+namespace vf::mpi 
+{
+std::shared_ptr<Communicator> MPICommunicator::getInstance()
+{
+    std::lock_guard<std::mutex> myLock(instantiation_mutex);
+    if (!instance){
+        instance = std::shared_ptr<MPICommunicator>(new MPICommunicator);
+    }
+    return instance;
+}
 //////////////////////////////////////////////////////////////////////////
 MPICommunicator::MPICommunicator()
 {
     // proof if MPI is initialized
-    int mpiInitialized = (int)false;
+    int mpiInitialized = 0; // false
     MPI_Initialized(&mpiInitialized);
     if (!mpiInitialized) {
         MPI_Init(NULL, NULL);
@@ -25,7 +36,7 @@ MPICommunicator::MPICommunicator()
 MPICommunicator::~MPICommunicator()
 {
     // proof if MPI is finalized
-    int _mpiFinalized = (int)false;
+    int _mpiFinalized = 0; // false
     MPI_Finalized(&_mpiFinalized);
     if (!_mpiFinalized) {
         MPI_Finalize();
@@ -33,13 +44,6 @@ MPICommunicator::~MPICommunicator()
     }
 }
 //////////////////////////////////////////////////////////////////////////
-SPtr<Communicator> MPICommunicator::getInstance()
-{
-    if (!Communicator::instance)
-        Communicator::instance = SPtr<Communicator>(new MPICommunicator());
-    return Communicator::instance;
-}
-//////////////////////////////////////////////////////////////////////////
 void MPICommunicator::abort(int errorcode) { MPI_Abort(comm, errorcode); }
 ////////////////////////////////////////////////////////////////////////////
 vector<string> MPICommunicator::gather(const string &str)
@@ -164,4 +168,7 @@ void MPICommunicator::broadcast(float &value) { broadcast<float>(value); }
 void MPICommunicator::broadcast(double &value) { broadcast<double>(value); }
 //////////////////////////////////////////////////////////////////////////
 void MPICommunicator::broadcast(long int &value) { broadcast<long int>(value); }
-#endif
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h b/src/mpi/MPICommunicator.h
similarity index 96%
rename from src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h
rename to src/mpi/MPICommunicator.h
index cd63d8f1db9e27ab3572a26693355c88a303104f..96d7e3593acb6d94706c9842f176e96c7a0e8969 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h
+++ b/src/mpi/MPICommunicator.h
@@ -1,7 +1,7 @@
 #if defined VF_MPI
 
-#ifndef MPICOMMUNICATOR_H
-#define MPICOMMUNICATOR_H
+#ifndef MPI_MPICOMMUNICATOR_H
+#define MPI_MPICOMMUNICATOR_H
 
 #include "Communicator.h"
 #include <PointerDefinitions.h>
@@ -11,19 +11,20 @@
 #include <string>
 #include <vector>
 
+namespace vf::mpi 
+{
+
 //! \brief A class uses MPI library to communication.
 //! \details Support MPI communication. Implements singleton pattern.
 //! \author K. Kutscher
-
 class MPICommunicator : public Communicator
 {
-private:
-    MPICommunicator();
-    MPICommunicator(const MPICommunicator &) = default;
-
 public:
+    MPICommunicator(MPICommunicator const&) = delete;
+    MPICommunicator& operator=(MPICommunicator const&) = delete;
+
     ~MPICommunicator() override;
-    static SPtr<Communicator> getInstance();
+    static std::shared_ptr<Communicator> getInstance();
     int getBundleID() override;
     int getNumberOfBundles() override;
     int getProcessID() override;
@@ -75,6 +76,8 @@ public:
     void broadcast(T &value);
 
 private:
+    MPICommunicator();
+
     int numprocs, PID;
     MPI_Comm comm;
     int root;
@@ -201,4 +204,6 @@ void MPICommunicator::broadcast(T &value)
 
 #endif
 
+}
+
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.cpp b/src/mpi/NullCommunicator.cpp
similarity index 93%
rename from src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.cpp
rename to src/mpi/NullCommunicator.cpp
index d3846dfd36a765891936e45c80a62dcd88025c15..0f407d0dccab79b551e8671bcaa150f6aab36789 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.cpp
+++ b/src/mpi/NullCommunicator.cpp
@@ -33,9 +33,17 @@
 
 #include "NullCommunicator.h"
 
-NullCommunicator::NullCommunicator() = default;
-//////////////////////////////////////////////////////////////////////////
-NullCommunicator::~NullCommunicator() = default;
+namespace vf::mpi 
+{
+
+// std::shared_ptr<Communicator> NullCommunicator::getInstance()
+// {
+//     std::lock_guard<std::mutex> myLock(instantiation_mutex);
+//     if (!instance){
+//         instance = std::shared_ptr<NullCommunicator>(new NullCommunicator);
+//     }
+//     return instance;
+// }
 //////////////////////////////////////////////////////////////////////////
 int NullCommunicator::getBundleID() { return 0; }
 //////////////////////////////////////////////////////////////////////////
@@ -62,3 +70,6 @@ void NullCommunicator::allGatherInts(std::vector<int> &svalues, std::vector<int>
 void NullCommunicator::sendSerializedObject(std::stringstream &ss, int target) {}
 //////////////////////////////////////////////////////////////////////////
 void NullCommunicator::receiveSerializedObject(std::stringstream &ss, int source) {}
+
+
+}
diff --git a/src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.h b/src/mpi/NullCommunicator.h
similarity index 94%
rename from src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.h
rename to src/mpi/NullCommunicator.h
index 3dad3395a34b5d3a049c3b0bb8d960e924078324..1a3ba3eac5c840d8e6e6c474d7dec9dfe6be6750 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/NullCommunicator.h
+++ b/src/mpi/NullCommunicator.h
@@ -31,20 +31,20 @@
 //! \author Konstantin Kutscher
 //=======================================================================================
 
-#ifndef NullCommunicator_H
-#define NullCommunicator_H
+#ifndef MPI_NullCommunicator_H
+#define MPI_NullCommunicator_H
 
 #include "Communicator.h"
 
-#include <PointerDefinitions.h>
+namespace vf::mpi 
+{
 
 //! \brief A class implements Communicator for shared memory.
 //! \details NullCommunicator is only a place-holder. It is only one process in shared memory.
 class NullCommunicator : public Communicator
 {
 public:
-    NullCommunicator();
-    ~NullCommunicator() override;
+    // static std::shared_ptr<Communicator> getInstance();
     int getBundleID() override;
     int getNumberOfBundles() override;
     int getProcessID() override;
@@ -59,8 +59,9 @@ public:
     void sendSerializedObject(std::stringstream &ss, int target) override;
     void receiveSerializedObject(std::stringstream &ss, int source) override;
 
-protected:
-private:
+    
 };
 
+}
+
 #endif