diff --git a/.clang-format b/.clang-format
index 915697a4a6bf5b2bb3bd74e0593e02915a31d450..60054387527c3cfa59e2dcfa13ebb2ae9a0b184a 100644
--- a/.clang-format
+++ b/.clang-format
@@ -50,7 +50,7 @@ BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeColon
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: true
-ColumnLimit:     300
+ColumnLimit:     125
 CommentPragmas:  '^ IWYU pragma:'
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
diff --git a/.clang-tidy b/.clang-tidy
index e7d5a5e58c66b679eb607560b1c4073abb0f6d97..3ed9ebc5b5104aa82e28c90c1e6a1d3117b89d79 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -53,5 +53,5 @@ cppcoreguidelines-special-member-functions,
 -cppcoreguidelines-interfaces-global-init
 
 '
-HeaderFilterRegex: '\.h$'
-...
+HeaderFilterRegex: '.*/src/.*'
+WarningsAsErrors: 'True'
\ No newline at end of file
diff --git a/.clangd b/.clangd
new file mode 100644
index 0000000000000000000000000000000000000000..c6f7db4e4d1eb5d761fd66dab7ff582e640651dd
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,9 @@
+CompileFlags:
+  Add:
+    - -xc++
+    - -std=c++17
+    - --cuda-gpu-arch=sm_70
+    - --cuda-path=/usr/local/cuda
+    - -L/usr/local/cuda/lib64
+    - -I/usr/local/cuda/include
+  Remove: [-ccbin=*,-rdc=*,--generate-code*,--options-file,-forward*,--extended-lambda,-fopenmp]
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 74df1acd7c98139314656e70b2e074adcbb0983b..50c1263289d238837267f21e101afa190cb2f6ed 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -5,15 +5,14 @@
             "extensions": [
                 "mhutchie.git-graph",
                 "donjayamanne.githistory",
+                "ms-python.python",
                 "twxs.cmake",
-                "ms-vscode.cpptools",
-                "visualstudioexptteam.vscodeintellicode",
-                "xaver.clang-format",
-                "notskm.clang-tidy",
+                //"ms-vscode.cpptools",
+                "usernamehw.errorlens", // show warnings in code
                 "streetsidesoftware.code-spell-checker",
-                "llvm-vs-code-extensions.vscode-clangd",
-                "jbenden.c-cpp-flylint",
-                "ms-python.python",
+                "xaver.clang-format",
+                "msekoranja.clang-format-checker", // currently this extensions results in a lot of warnings
+                "llvm-vs-code-extensions.vscode-clangd" // code index based on clang using the compile_commannds.json
             ]
         }
     },
@@ -21,5 +20,5 @@
     "runArgs": ["--gpus","all",                     // remove this line in case you have no gpus available
                 "--hostname=${localEnv:HOSTNAME}"], // HOSTNAME needs to be known by the vscode environment. It is probably necessary to add "export HOSTNAME=<hostname>" to the config file of your host machine's bash.
 
-    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu22_04:1.0"
+    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu22_04:1.1"
 }
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 281644f5614054f167c577898d4b8eb4788b2076..0f11fe6fcd5df6e63b4aa08dbb2bb9777a9b4962 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,7 @@
 ###############################################################################
 ##                       VirtualFluids CI Pipeline                           ##
 ###############################################################################
-image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu22_04:1.0
+image: git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu22_04:1.1
 
 stages:
   - build
@@ -114,18 +114,14 @@ msvc_17:
       - $CI_PROJECT_DIR/$env:BUILD_FOLDER/
 
 
-###############################################################################
-##                             Build Python                                  ##
 ###############################################################################
 gcc_12_python:
-  stage: build_python
-
-  needs: ["gcc_12"]
+  stage: build
 
   cache:
     key: "gcc_12-$CI_COMMIT_REF_SLUG"
     paths:
-      - build
+      - pybuild/
 
   artifacts:
     expire_in: 1 hrs
@@ -138,7 +134,7 @@ gcc_12_python:
 
   script:
     - export SKBUILD_BUILD_DIR="pybuild"
-    - export SKBUILD_CMAKE_ARGS="-DBUILD_VF_CPU=ON;-DBUILD_VF_DOUBLE_ACCURACY=ON;-DCMAKE_CXX_COMPILER_LAUNCHER=ccache;-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache;-DCMAKE_C_COMPILER_LAUNCHER=ccache;-G=Ninja"
+    - export SKBUILD_CMAKE_ARGS="-DBUILD_VF_CPU=ON;-DBUILD_VF_GPU=ON;-DCMAKE_CUDA_ARCHITECTURES=70;-DBUILD_VF_DOUBLE_ACCURACY=ON;-DCMAKE_CXX_COMPILER_LAUNCHER=ccache;-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache;-DCMAKE_C_COMPILER_LAUNCHER=ccache;-G=Ninja"
     - pip install . -v
 
 ###############################################################################
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 431c2f796787e5ddb22df5f93587eeac6362f9cf..fc1d3181ca6ed0900a07dd80bcf99f1cb0f169df 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,9 @@
 {
-    "c-cpp-flylint.flexelint.enable": false,
-    "c-cpp-flylint.flawfinder.enable": false
+    "files.watcherExclude": {
+        "**/.git/objects/**": true,
+        "**/.git/subtree-cache/**": true,
+        "**/node_modules/*/**": true,
+        "**/.cache/**": true
+      }
+    
 }
\ No newline at end of file
diff --git a/CMake/cmake_config_files/ARAGORN.config.cmake b/CMake/cmake_config_files/ARAGORN.config.cmake
index 9f33c9977924ab60a1300763f53d04bb842b2f00..7643d267aabce7914000face6553fb38135b7a63 100644
--- a/CMake/cmake_config_files/ARAGORN.config.cmake
+++ b/CMake/cmake_config_files/ARAGORN.config.cmake
@@ -7,7 +7,7 @@
 set(CMAKE_CUDA_ARCHITECTURES 86)     # Nvidia GeForce RTX 3060
 
 # add invidual apps here
-set(GPU_APP "apps/gpu/LBM/")
+set(GPU_APP "apps/gpu/")
 list(APPEND USER_APPS 
     "${GPU_APP}DrivenCavityMultiGPU"
     "${GPU_APP}SphereScaling"
diff --git a/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
index 3259e13acaade9b896e5e4a82dec90d3f4eb5e89..25224f9a8e6f25cad0b1d90c4d3035ebff3a75c4 100644
--- a/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
+++ b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
@@ -9,7 +9,7 @@ set(CMAKE_CUDA_ARCHITECTURES 86)     # Nvidia GeForce RTX 3060
 set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
-set(GPU_APP "apps/gpu/LBM/")
+set(GPU_APP "apps/gpu/")
 list(APPEND USER_APPS 
     "${GPU_APP}DrivenCavityMultiGPU"
     "${GPU_APP}SphereScaling"
diff --git a/CMake/cmake_config_files/ELDARION.config.cmake b/CMake/cmake_config_files/ELDARION.config.cmake
index 53dc97e213a7445e153157ec76937ea0fec1f678..13abc819e862f9275fa2cd77ee6bb1f2d31375bf 100644
--- a/CMake/cmake_config_files/ELDARION.config.cmake
+++ b/CMake/cmake_config_files/ELDARION.config.cmake
@@ -10,7 +10,7 @@ SET(CMAKE_CUDA_ARCHITECTURES 61)    # GeForce 1080 Ti
 set(PATH_NUMERICAL_TESTS "~/output/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
-set(GPU_APP "apps/gpu/LBM/")
+set(GPU_APP "apps/gpu/")
 list(APPEND USER_APPS 
     # "${GPU_APP}DrivenCavityMultiGPU"
     # "${GPU_APP}SphereScaling"
diff --git a/CMake/cmake_config_files/KI4ALLGPU.config.cmake b/CMake/cmake_config_files/KI4ALLGPU.config.cmake
index 111a3b3881d7a07eada02fb1e826e88b64b3591e..7e55f21f5549d10b2f5e93daf51a2b0bcb7f8bc1 100644
--- a/CMake/cmake_config_files/KI4ALLGPU.config.cmake
+++ b/CMake/cmake_config_files/KI4ALLGPU.config.cmake
@@ -6,7 +6,7 @@
 
 set(CMAKE_CUDA_ARCHITECTURES 80)     # Nvidia Tesla A100
 
-set(GPU_APP "apps/gpu/LBM/")
+set(GPU_APP "apps/gpu/")
 list(APPEND USER_APPS 
     "${GPU_APP}ChannelFlow"
     "${GPU_APP}SphereScaling"
diff --git a/CMake/cmake_config_files/MOLLOK.config.cmake b/CMake/cmake_config_files/MOLLOK.config.cmake
index bdaf06f86137f542663bf3a4d603230e92b9260b..fac7be0cffc5ff09b8f0b46dbb21af7afb938a66 100644
--- a/CMake/cmake_config_files/MOLLOK.config.cmake
+++ b/CMake/cmake_config_files/MOLLOK.config.cmake
@@ -12,6 +12,6 @@ set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
 # add invidual apps here
-list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
-list(APPEND USER_APPS "apps/gpu/LBM/TGV_3D_GridRef")
-list(APPEND USER_APPS "apps/gpu/LBM/SphereRefined")
+list(APPEND USER_APPS "apps/gpu/WTG_RUB")
+list(APPEND USER_APPS "apps/gpu/TGV_3D_GridRef")
+list(APPEND USER_APPS "apps/gpu/SphereRefined")
diff --git a/CMake/cmake_config_files/MULE.config.cmake b/CMake/cmake_config_files/MULE.config.cmake
index 2afbce6cc257fa0b8ff4dd7de580cb50c01369f1..abe277026891774030c9bb6338b94616f54f6f4c 100644
--- a/CMake/cmake_config_files/MULE.config.cmake
+++ b/CMake/cmake_config_files/MULE.config.cmake
@@ -1,4 +1,4 @@
 SET(CMAKE_CUDA_ARCHITECTURES "75")
 
-list(APPEND USER_APPS "apps/gpu/LBM/ActuatorLine")
-list(APPEND USER_APPS "apps/gpu/LBM/SphereScaling")
+list(APPEND USER_APPS "apps/gpu/ActuatorLine")
+list(APPEND USER_APPS "apps/gpu/SphereScaling")
diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake
index 2112bd6aa50e9335bc6b23bda0f0e9fda3ef7533..7d0e1d6b6ba2790a2ab24e20d4ab6199ee75d0cf 100644
--- a/CMake/cmake_config_files/PHOENIX.config.cmake
+++ b/CMake/cmake_config_files/PHOENIX.config.cmake
@@ -8,7 +8,7 @@
 ## nvidia
 set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100
 
-set(GPU_APP "apps/gpu/LBM/")
+set(GPU_APP "apps/gpu/")
 list(APPEND USER_APPS 
     # "${GPU_APP}DrivenCavityMultiGPU"
     # "${GPU_APP}SphereScaling"
diff --git a/CMake/cmake_config_files/TESLA03.config.cmake b/CMake/cmake_config_files/TESLA03.config.cmake
index e29c7306c5448b97eefed9d7a41871a5e4d3b589..dc0f1253c668dd6fba92c5d1ee33c387af67ee5a 100644
--- a/CMake/cmake_config_files/TESLA03.config.cmake
+++ b/CMake/cmake_config_files/TESLA03.config.cmake
@@ -12,4 +12,4 @@ SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
 list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
 
 # add invidual apps here
-list(APPEND USER_APPS "apps/gpu/LBM/MusselOyster")
+list(APPEND USER_APPS "apps/gpu/MusselOyster")
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d82fe2b93c5103beb6a960ea4bd29914f0d982c4..34edea623975db57b6b3a23b0a1b49721367d67b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,6 +138,11 @@ if(BUILD_VF_GPU)
     set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "Cuda Architecture (compute capabilitiy)")
 
     set(CMAKE_CUDA_FLAGS_DEBUG " -G" CACHE STRING "" FORCE)
+
+    # we disable the usage of cuda response files here
+    # usually CUDA_INCLUDES.rsp is genereated by cmake containing all include paths and is passed in compile_commands.json via the --options-file flag
+    # this .rsp file can not be parsed by clangd and therefore we disable it
+    set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
 endif()
 
 
@@ -205,7 +210,7 @@ ENDIF()
 
 add_subdirectory(src/logger)
 add_subdirectory(src/basics)
-add_subdirectory(src/mpi)
+add_subdirectory(src/parallel)
 add_subdirectory(src/lbm)
 
 
@@ -216,7 +221,7 @@ if (BUILD_VF_CPU)
     include(cpu.cmake)
 endif()
 if(BUILD_VF_GPU)
-    add_subdirectory(src/cuda)
+    add_subdirectory(src/gpu/cuda_helper)
     include(gpu.cmake)
 endif()
 
diff --git a/Containers/Ubuntu22_04.Dockerfile b/Containers/Ubuntu22_04.Dockerfile
index eff34a26937107a7f5612a598306e83f22de3ab8..8eb107bc5d9ac43c1bcda5b1f638d9f49bcc2c91 100644
--- a/Containers/Ubuntu22_04.Dockerfile
+++ b/Containers/Ubuntu22_04.Dockerfile
@@ -1,7 +1,7 @@
 # VirtualFluids Development Image:
 # Ubuntu 22.04
 
-FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
 
 # timezone
 ARG TZ
@@ -16,22 +16,25 @@ RUN apt-get update &&   \
     ninja-build         \
     openmpi-bin         \
     libopenmpi-dev      \
-    libomp-15-dev          \
-    clang-15               \
-    clang-format-15        \
-    clang-tidy-15          \
-    clang-tools-15         \
+    libomp-15-dev       \
+    clang-15            \
+    clang-format-15     \
+    clang-tidy-15       \
+    clang-tools-15      \
     python3.11          \
     python3-pip         \
     python3.11-dev      \
     cppcheck            \
-    clangd-12           \
     && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 \
     && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 \
     && update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 100 \
     && update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 100 \
     && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 100 \
-    && update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-12 100 \
+    && ln -s clang-tidy-15 /usr/bin/clang-tidy \
+    && ln -s clang-format-15 /usr/bin/clang-format \
+    && wget https://github.com/clangd/clangd/releases/download/16.0.2/clangd-linux-16.0.2.zip && unzip clangd-linux-16.0.2.zip \
+    && mv clangd_16.0.2/bin/clangd /usr/bin/clangd-16 && mv clangd_16.0.2/lib/clang/16 /usr/lib/clang/ \
+    && update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-16 100 \
     && pip3 install      \
         cmake==3.26.3    \
         setuptools       \
diff --git a/Containers/Ubuntu22_04_wiFI.Dockerfile b/Containers/Ubuntu22_04_wiFI.Dockerfile
new file mode 100755
index 0000000000000000000000000000000000000000..105fe9dce0116eb4ad459982a008ba36ed620839
--- /dev/null
+++ b/Containers/Ubuntu22_04_wiFI.Dockerfile
@@ -0,0 +1,14 @@
+# VirtualFluids Development Image:
+# Ubuntu 22.04
+
+FROM git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu22_04:1.1 as build
+
+# For wiFI https://source.coderefinery.org/Hkorb/wifi.git
+RUN pip3 install      \
+    pandas            \
+    cupy-cuda12x      \
+    mpi4py            \
+    && git clone --recurse-submodules https://source.coderefinery.org/Hkorb/wifi.git \
+    && cd wifi  \
+    && git checkout develop \
+    && pip3 install -e . >> log_wifi_install.txt
diff --git a/Python/actuator_line/actuator_line.py b/Python/actuator_line/actuator_line.py
index b8e7efb59673c3b3c9206bfda535bdd1f85e451d..00fa21baeae9d075f4ebeb49baa9d13394ae4b94 100644
--- a/Python/actuator_line/actuator_line.py
+++ b/Python/actuator_line/actuator_line.py
@@ -36,7 +36,7 @@ r"""
 import numpy as np
 from pathlib import Path
 from mpi4py import MPI
-from pyfluids.bindings import basics, gpu, logger
+from pyfluids import basics, gpu, logger, communicator
 #%%
 sim_name = "ABL"
 config_file = Path(__file__).parent/"configActuatorLine.txt"
@@ -47,15 +47,13 @@ output_path.mkdir(exist_ok=True)
 #%%
 logger.Logger.initialize_logger()
 
-#%%
-grid_factory = gpu.grid_generator.GridFactory.make()
-grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
-communicator = gpu.Communicator.get_instance()
+grid_builder = gpu.grid_generator.MultipleGridBuilder()
+communicator = communicator.Communicator.get_instance()
 
 config = basics.ConfigurationFile()
 config.load(str(config_file))
 
-para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config)
+para = gpu.Parameter(communicator.get_number_of_processes(), communicator.get_number_of_processes(), config)
 bc_factory = gpu.BoundaryConditionFactory()
 
 #%%
diff --git a/Python/boundary_layer/boundary_layer.py b/Python/boundary_layer/boundary_layer.py
index 0ebdbb43894e6bdae55cca1f788a33c3739fb0c6..d2efffeaca521f23f07fe24a0ad9edfeab675488 100644
--- a/Python/boundary_layer/boundary_layer.py
+++ b/Python/boundary_layer/boundary_layer.py
@@ -36,7 +36,7 @@ r"""
 import numpy as np
 from pathlib import Path
 from mpi4py import MPI
-from pyfluids import basics, gpu, logger
+from pyfluids import basics, gpu, logger, communicator
 #%%
 sim_name = "ABL"
 config_file = Path(__file__).parent/"configBoundaryLayer.txt"
@@ -48,14 +48,13 @@ output_path.mkdir(exist_ok=True)
 logger.Logger.initialize_logger()
 
 #%%
-grid_factory = gpu.grid_generator.GridFactory.make()
-grid_builder = gpu.grid_generator.MultipleGridBuilder.make_shared(grid_factory)
-communicator = gpu.Communicator.get_instance()
+grid_builder = gpu.grid_generator.MultipleGridBuilder()
+communicator = communicator.Communicator.get_instance()
 
 config = basics.ConfigurationFile()
 config.load(str(config_file))
 
-para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config)
+para = gpu.Parameter(communicator.get_number_of_processes(), communicator.get_process_id(), config)
 bc_factory = gpu.BoundaryConditionFactory()
 
 #%%
diff --git a/apps/cpu/AcousticPulse/ap.cpp b/apps/cpu/AcousticPulse/ap.cpp
index ac69eee6d2d158a2ea8eea27da8bf968b92f5f06..a04952a35cdb9ba5b55b4ed4dc9fb6225e26f5b7 100644
--- a/apps/cpu/AcousticPulse/ap.cpp
+++ b/apps/cpu/AcousticPulse/ap.cpp
@@ -10,7 +10,7 @@ void run()
 {
    try
    {
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/BeadPack/beadpack.cpp b/apps/cpu/BeadPack/beadpack.cpp
index d683fc445359e6e2d19a7d6f72c59158d6bf7f98..3da3030bb618812bc4e37db49683b9b6ce22fd84 100644
--- a/apps/cpu/BeadPack/beadpack.cpp
+++ b/apps/cpu/BeadPack/beadpack.cpp
@@ -23,7 +23,7 @@ void sbonepd(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (machine == "BOMBADIL")
diff --git a/apps/cpu/BoxBenchmark/bb.cpp b/apps/cpu/BoxBenchmark/bb.cpp
index 0f0c7d6c1ddd066f36f95d8f87be7afae3ddf2c8..3915f90627d7202c65a797960974b5c6f22ac1e2 100644
--- a/apps/cpu/BoxBenchmark/bb.cpp
+++ b/apps/cpu/BoxBenchmark/bb.cpp
@@ -37,7 +37,7 @@ void run(string configname)
 
       //UbLog::reportingLevel() = UbLog::logLevelFromString("DEBUG3");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/CheckpointConverter/cpc.cpp b/apps/cpu/CheckpointConverter/cpc.cpp
index 4eb526cc75be39153f61cbc4d599a21bcc5394b4..a34e758db522fb3cc72a7bedd6eef6c8cc20a49c 100644
--- a/apps/cpu/CheckpointConverter/cpc.cpp
+++ b/apps/cpu/CheckpointConverter/cpc.cpp
@@ -17,7 +17,7 @@ void run(string configname)
       int    step = config.getValue<int>("step");
       int    numberOfProcesses = config.getValue<int>("numberOfProcesses");
       
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       SPtr<Grid3D> grid(new Grid3D(comm));
diff --git a/apps/cpu/ConvectionOfVortex/cov.cpp b/apps/cpu/ConvectionOfVortex/cov.cpp
index bfe29fc9bb6c18782212f8cb9a080d1815b8c6a1..102a1ad7d16fa7cd343449fd27fab73aae70c92f 100644
--- a/apps/cpu/ConvectionOfVortex/cov.cpp
+++ b/apps/cpu/ConvectionOfVortex/cov.cpp
@@ -12,7 +12,7 @@ void run()
 
    try
    {
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/CouetteFlow/cflow.cpp b/apps/cpu/CouetteFlow/cflow.cpp
index 112c0c96bd3c9bde56c3ef53f0047569fec031b2..6c7e28f5abba629a7ad9c59204b70fb3d7e03574 100644
--- a/apps/cpu/CouetteFlow/cflow.cpp
+++ b/apps/cpu/CouetteFlow/cflow.cpp
@@ -40,7 +40,7 @@ void bflow(string configname)
 //      double          Re = config.getValue<double>("Re");
 //      double          Bn = config.getValue<double>("Bn");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DHIT/dhit.cpp b/apps/cpu/DHIT/dhit.cpp
index e06db26b8706bad1f94fec432868daabea8d1cce..3143aa1c8f79ceb4c69aca04bac193025a604392 100644
--- a/apps/cpu/DHIT/dhit.cpp
+++ b/apps/cpu/DHIT/dhit.cpp
@@ -29,7 +29,7 @@ void run(string configname)
       double          lambda = config.getDouble("lambda");
       double          initTime = config.getDouble("initTime");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16-Porous/f16.cpp b/apps/cpu/DLR-F16-Porous/f16.cpp
index 791c1c926dbdf86bf1c2f1634288696248cdb305..08bafaf3df6cdb3c32a3592ea950bd3bc2c42474 100644
--- a/apps/cpu/DLR-F16-Porous/f16.cpp
+++ b/apps/cpu/DLR-F16-Porous/f16.cpp
@@ -95,7 +95,7 @@ void run(string configname)
       
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16-Solid/f16.cpp b/apps/cpu/DLR-F16-Solid/f16.cpp
index 2a9893ef28efba2ee163e6e9d8c62ffadd68568a..cf08ef73aac0d8279aa84e62b717700017163a7f 100644
--- a/apps/cpu/DLR-F16-Solid/f16.cpp
+++ b/apps/cpu/DLR-F16-Solid/f16.cpp
@@ -47,7 +47,7 @@ void run(string configname)
       double          timeAvStart       = config.getValue<double>("timeAvStart");
       double          timeAvStop        = config.getValue<double>("timeAvStop");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/DLR-F16/f16.cpp b/apps/cpu/DLR-F16/f16.cpp
index 17fef2d15f4026d4b7035a896d78664350f6b86c..639a73c3863d6e725ad048e44e0f9be12faf3909 100644
--- a/apps/cpu/DLR-F16/f16.cpp
+++ b/apps/cpu/DLR-F16/f16.cpp
@@ -13,7 +13,7 @@ double rangeRandom1()
 
 void setBC(SPtr<Grid3D> grid, string pathGeo, string fngFileWhole, string zigZagTape, vector<double>  boundingBox, double uLB, double rhoLB, double blockLength, SPtr<BCProcessor> bcProcessor)
 {
-   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
    
    std::vector<std::vector<SPtr<Block3D>> > blockVector;
@@ -205,7 +205,7 @@ void run(string configname)
       int             chunk = config.getValue<int>("chunk");
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/FallingSphere/FallingSphere.cpp b/apps/cpu/FallingSphere/FallingSphere.cpp
index ba837fcf15b32a65983b633c768aea6250a976a7..0ce12e62e66b45ea3f9f874f52d5f119c7cc0eb4 100644
--- a/apps/cpu/FallingSphere/FallingSphere.cpp
+++ b/apps/cpu/FallingSphere/FallingSphere.cpp
@@ -11,7 +11,7 @@ using namespace std;
 
 int main(int argc, char *argv[])
 {
-    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
     int myid                                        = comm->getProcessID();
 
 
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp b/apps/cpu/FlowAroundCylinder/cylinder.cpp
index 5956a48a8bdf08d11047400f25302005d080d872..3e5be5e080a5702e9608bf037d492b8cbc809dfc 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp
@@ -35,7 +35,7 @@ void run(string configname)
       vector<int>     blockNx = config.getVector<int>("blockNx");
       real          dx = config.getValue<real>("dx");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/FlowAroundCylinder/cylinder.cpp.old b/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
index f251ee63514c67cca6df0e998cc196d3cc5a9ec8..774ed812b5fab63538a56744ddd41a2611eadc72 100644
--- a/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
+++ b/apps/cpu/FlowAroundCylinder/cylinder.cpp.old
@@ -15,7 +15,7 @@ void run(const char *cstr)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
@@ -385,7 +385,7 @@ void run2(const char *cstr)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp b/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
index 5d5e47fddeecaf06b3b23590833ff2dc12d610fb..e7f7bb84a4dece690ea0e93cc2d07b6cdd8932d2 100644
--- a/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
+++ b/apps/cpu/Hagen_Poiseuille_flow/pflow.cpp
@@ -29,7 +29,7 @@ using namespace std;
 //      double          deltax = config.getDouble("deltax");
 //
 //
-//      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+//      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 //      int myid = comm->getProcessID();
 //
 //      if (logToFile)
@@ -322,7 +322,7 @@ void pflowdp(string configname)
       double          cpStepStart = config.getValue<double>("cpStepStart");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       LBMReal rhoLB = 0.0;
diff --git a/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp b/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
index f298d697f554f002f42438955ecbb3a3308ed219..e278fcb78fddc4f5606256a6805dbda4e344a19d 100644
--- a/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
+++ b/apps/cpu/Hagen_Poiseuille_flow2/pflow2.cpp
@@ -29,7 +29,7 @@ void pflowdp(string configname)
       double          deltax = config.getValue<double>("deltax");
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       LBMReal rhoLB = 0.0;
diff --git a/apps/cpu/HerschelBulkleyModel/hbflow.cpp b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
index 6abe6b5c55ce3fb3c54d5dd169a06b47026b9b3e..67ed5404eebc1eeffe1e385ff42dc806c1588301 100644
--- a/apps/cpu/HerschelBulkleyModel/hbflow.cpp
+++ b/apps/cpu/HerschelBulkleyModel/hbflow.cpp
@@ -41,7 +41,7 @@ void bflow(string configname)
 //      double          Bn = config.getValue<double>("Bn");
       real          scaleFactor = config.getValue<real>("scaleFactor");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
index 221b106123b5fadffcf60427481800ef0d673b76..90ae44b62f18e412c4f3e0bd8ba88de56b01650a 100644
--- a/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
+++ b/apps/cpu/HerschelBulkleySphere/hbsphere.cpp
@@ -38,7 +38,7 @@ void bflow(string configname)
       real          Bn = config.getValue<real>("Bn");
       vector<real>  sphereCenter = config.getVector<real>("sphereCenter");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/InterfaceTest/itest.cpp b/apps/cpu/InterfaceTest/itest.cpp
index 723802f6d3160dd79c28c2347ed311d360020d77..e8c93d4e5b3698b75964d63de0c18780a4566552 100644
--- a/apps/cpu/InterfaceTest/itest.cpp
+++ b/apps/cpu/InterfaceTest/itest.cpp
@@ -11,7 +11,7 @@ void run()
 {
    try
    {
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       int    numOfThreads = 4;
diff --git a/apps/cpu/JetBreakup/JetBreakup.cpp b/apps/cpu/JetBreakup/JetBreakup.cpp
index f4b74ca379edaf8840cb6875ca0ff9fc7f296509..53cdc73701a38e8033b8ddf978e5e9dd021280be 100644
--- a/apps/cpu/JetBreakup/JetBreakup.cpp
+++ b/apps/cpu/JetBreakup/JetBreakup.cpp
@@ -61,7 +61,7 @@ void run(string configname)
 
         int caseN = config.getValue<int>("case");
 
-        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/JetBreakup/JetBreakup.cpp.new b/apps/cpu/JetBreakup/JetBreakup.cpp.new
index 953a8dee8caf0e7972b79138c8480f1883ebdfec..1c03deebdc1c6bfa195735c2cdf1dbb4ba0a8212 100644
--- a/apps/cpu/JetBreakup/JetBreakup.cpp.new
+++ b/apps/cpu/JetBreakup/JetBreakup.cpp.new
@@ -58,7 +58,7 @@ void run(string configname)
 
         int caseN = config.getValue<int>("case");
 
-        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 315bee6f263b3d5395499c0ed925c3790d7bfcdf..53927b7378db9bfca0693c1808610e0d6b532d66 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -33,7 +33,7 @@ void run(string configname)
       real          cpStep = config.getValue<real>("cpStep");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/LaminarTubeFlowConv/ltf.cpp b/apps/cpu/LaminarTubeFlowConv/ltf.cpp
index 53cd7c1ac7900118f47e483f867d22de2e3e7974..d7515d44bfd135d95a103aa758f38e376421b01c 100644
--- a/apps/cpu/LaminarTubeFlowConv/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlowConv/ltf.cpp
@@ -30,7 +30,7 @@ void run(int tn)
       int numOfThreads = 1;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp b/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
index 1819ee0f6fe00191f28ddfcab8cce93466047289..01d6262c299f8f49e1e3cfd62ba1d9c39b127080 100644
--- a/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
+++ b/apps/cpu/LidDrivenCavity/LidDrivenCavity.cpp
@@ -80,7 +80,7 @@ int main(int  /*argc*/, char*  /*argv*/[])
       double g_maxX3 = 0.5;
 
       // NullCommunicator is a place-holder for interprocess communication
-      SPtr<vf::mpi::Communicator> comm = NullCommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = NullCommunicator::getInstance();
       // new grid object
       SPtr<Grid3D> grid(new Grid3D(comm));
       // set grid spacing
diff --git a/apps/cpu/LiggghtsApp/LiggghtsApp.cpp b/apps/cpu/LiggghtsApp/LiggghtsApp.cpp
index 969243a0516871033c9c4d001d92468dc0164e8c..5b3f27d2ef7f64fc692750d8480dafea4a69a030 100644
--- a/apps/cpu/LiggghtsApp/LiggghtsApp.cpp
+++ b/apps/cpu/LiggghtsApp/LiggghtsApp.cpp
@@ -19,7 +19,7 @@ int main(int argc, char *argv[])
 {
     //Sleep(30000);
 
-    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
     int myid                                        = comm->getProcessID();
 
 
diff --git a/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup b/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup
index b783a354fd39f66ba82383f2387daf92ecbde758..c0ca32e5ce1980af048c769e01aca95c1df9f0dd 100644
--- a/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup	
+++ b/apps/cpu/Multiphase/Multiphase (Droplet Test).cpp.backup	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup b/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup
index ebf91e6cb54f1a067535ae512f85e706a2d88980..d43a61d6767b4193f443bcb1fede89cf3f37bc5f 100644
--- a/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup	
+++ b/apps/cpu/Multiphase/Multiphase (Jet breakup on Phoenix).cpp.backup	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/Multiphase.cpp b/apps/cpu/Multiphase/Multiphase.cpp
index 79d969dd93c5bd87b0d327f5275d2c6b9a8a1448..07b5a661f2c6ead2c57d09b2c05869f86058d1ad 100644
--- a/apps/cpu/Multiphase/Multiphase.cpp
+++ b/apps/cpu/Multiphase/Multiphase.cpp
@@ -55,7 +55,7 @@ void run(string configname)
         real beta = 12 * sigma / interfaceWidth;
         real kappa = 1.5 * interfaceWidth * sigma;
 
-        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp b/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp
index e6efac95353e04e93aae0a9dfcde0422259a2832..868b4abfa15b990cfdb418066efda3a7e3c8f91c 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Droplet Test).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp b/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp
index 61d37660015ae6b6134e6d30c83010f75ec362e6..4b51783592d1ce6f89498313424d342b0766b289 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Final before automation).cpp	
@@ -67,7 +67,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp b/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp
index 32548c12b0d559928bf789cd2187cb180213dc65..c79270b365d36b1b2a5c89f14ab9408592a1ecc4 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Flow Focusing).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp b/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp
index ebf91e6cb54f1a067535ae512f85e706a2d88980..d43a61d6767b4193f443bcb1fede89cf3f37bc5f 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Jet breakup on Phoenix).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp b/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp
index 93844c4a2f9068debe7759943efb3243da923703..c213e6366119e8cbb8ca0db20e387b07a642c758 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (T-Junction).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp b/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp
index 68e07b43c2cfff23affee121f9d8b39997d0fd8d..4f5417397da1fc043e466f1901ccd727b8831ca4 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Thermal).cpp	
@@ -51,7 +51,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp b/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp
index 492a906b816129de271189ab8d35fe1a4d180b56..84de9055d3024be2409b12afed7540b38b057bc1 100644
--- a/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp	
+++ b/apps/cpu/Multiphase/backup/Multiphase (Tube).cpp	
@@ -42,7 +42,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Multiphase/backup/Multiphase.cpp b/apps/cpu/Multiphase/backup/Multiphase.cpp
index b029e35277729803df53fb0d79d025863e21719b..8a7708c05f1af35028518aa0ca061fc81f3f3de6 100644
--- a/apps/cpu/Multiphase/backup/Multiphase.cpp
+++ b/apps/cpu/Multiphase/backup/Multiphase.cpp
@@ -78,7 +78,7 @@ void run(string configname)
       double beta  = 12*sigma/interfaceThickness;
 	  double kappa = 1.5*interfaceThickness*sigma;
 	  
-	  CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+	  CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/MultiphaseDropletTest/droplet.cpp b/apps/cpu/MultiphaseDropletTest/droplet.cpp
index eaa3e550f873113bf2ffdeba4ed8a962ccae5046..f65c19633886135d8dc3c537d923924294b8d3c6 100644
--- a/apps/cpu/MultiphaseDropletTest/droplet.cpp
+++ b/apps/cpu/MultiphaseDropletTest/droplet.cpp
@@ -51,7 +51,7 @@ void run(string configname)
         bool newStart      = config.getValue<bool>("newStart");
         //double rStep = config.getValue<double>("rStep");
 
-        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
 
         if (myid == 0)
diff --git a/apps/cpu/Nozzle/nozzle.cpp b/apps/cpu/Nozzle/nozzle.cpp
index babee1900c0bd9bb10bca3bccd790e83d25d14b8..bdaec8e1a913a7633b6f918931b71a3d4cb0368e 100644
--- a/apps/cpu/Nozzle/nozzle.cpp
+++ b/apps/cpu/Nozzle/nozzle.cpp
@@ -18,7 +18,7 @@ int main(int argc, char *argv[])
 
     try {
 
-        std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid = comm->getProcessID();
 
         // bounding box
diff --git a/apps/cpu/OrganPipe/OrganPipe.cpp b/apps/cpu/OrganPipe/OrganPipe.cpp
index b23c159110dce4d1415f4a558d9cd1910e0c2de4..98b8c25050ba459c795ea22b3005686755e69077 100644
--- a/apps/cpu/OrganPipe/OrganPipe.cpp
+++ b/apps/cpu/OrganPipe/OrganPipe.cpp
@@ -8,7 +8,7 @@ void run(string configname)
 {
    try
    {
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (myid == 0) UBLOG(logINFO, "Testcase organ pipe");
diff --git a/apps/cpu/PlateWithPorousInlay/plate.cpp b/apps/cpu/PlateWithPorousInlay/plate.cpp
index 315bacfa954640c8963ef46c3a7c840280a69e06..0334da7c3410cba9f92a2c2396c9702db429c510 100644
--- a/apps/cpu/PlateWithPorousInlay/plate.cpp
+++ b/apps/cpu/PlateWithPorousInlay/plate.cpp
@@ -52,7 +52,7 @@ void run(const char *cstr)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/PoiseuilleFlow/pf1.cpp b/apps/cpu/PoiseuilleFlow/pf1.cpp
index 0e21dea4d252dc141fa5fd75feebe24c0b80042d..dffd5fde6c71dfd0ab647cee2c3e6ee1a81455d0 100644
--- a/apps/cpu/PoiseuilleFlow/pf1.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf1.cpp
@@ -9,7 +9,7 @@ void pf1()
 {
     using namespace vf::lbm::dir;
 
-   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
    //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf2.cpp b/apps/cpu/PoiseuilleFlow/pf2.cpp
index c339e06a6c9cac71b4ba9d340ef72027fd999f41..7990d19690f048f444ff234a00557b22f9f5e86f 100644
--- a/apps/cpu/PoiseuilleFlow/pf2.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf2.cpp
@@ -6,7 +6,7 @@
 ////pipe flow with pressure drop
 //void pf2()
 //{
-//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+//   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf3.cpp b/apps/cpu/PoiseuilleFlow/pf3.cpp
index fa01b68521fadb59c4101a82caef30377b3368fc..0442b1c67663f91abb39350cc4287a62efa95e74 100644
--- a/apps/cpu/PoiseuilleFlow/pf3.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf3.cpp
@@ -6,7 +6,7 @@
 ////two plates flow with forcing
 //void pf3()
 //{
-//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+//   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/PoiseuilleFlow/pf4.cpp b/apps/cpu/PoiseuilleFlow/pf4.cpp
index 2e419358e1d5b32ea0f9fc542f8d47b80aa6cae8..9568cdd619dec4a6585de167ca2234894962ef10 100644
--- a/apps/cpu/PoiseuilleFlow/pf4.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf4.cpp
@@ -6,7 +6,7 @@
 ////two plates flow with pressure drop
 //void pf4()
 //{
-//   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+//   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 //   int myid = comm->getProcessID();
 //
 //   //parameters
diff --git a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
index c9a28efc4033ceb0be8cbabc4976055f994c1932..a6f276130e908fa4c066bd7f496a09f06a9cf0d9 100644
--- a/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
+++ b/apps/cpu/RisingBubble2D/RisingBubble2D.cpp
@@ -52,7 +52,7 @@ void run(string configname)
         bool newStart = config.getValue<bool>("newStart");
         // double rStep = config.getValue<double>("rStep");
 
-        std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid = comm->getProcessID();
 
         if (myid == 0) UBLOG(logINFO, "2D Rising Bubble: Start!");
diff --git a/apps/cpu/TPMSRow/TPMSRow.cpp b/apps/cpu/TPMSRow/TPMSRow.cpp
index 596b79d0525339d38ee18a3062a219459de8f305..09be56c68ed6cc0db2d911b19a21feae2c890d80 100644
--- a/apps/cpu/TPMSRow/TPMSRow.cpp
+++ b/apps/cpu/TPMSRow/TPMSRow.cpp
@@ -47,7 +47,7 @@ void run(string configname)
         bool newStart               = config.getValue<bool>("newStart");
 
         //SPtr<Communicator> comm = MPICommunicator::getInstance();
-        SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+        SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
         int myid                = comm->getProcessID();
         //int numOfProcesses      = comm->getNumberOfProcesses();
 
diff --git a/apps/cpu/ViskomatXL/viskomat.cpp b/apps/cpu/ViskomatXL/viskomat.cpp
index 327f25e599c80b059fe273cfd2a0c526ed4279f7..7db98670e2325825a76b60559d09d818838f9430 100644
--- a/apps/cpu/ViskomatXL/viskomat.cpp
+++ b/apps/cpu/ViskomatXL/viskomat.cpp
@@ -40,7 +40,7 @@ void bflow(string configname)
 
       vf::basics::ConfigurationFile   viscosity;
 
-      std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/Wing/wing.cpp b/apps/cpu/Wing/wing.cpp
index ff6cbcfcab3b60669aea19ca6a56077034f0e7dc..d7e4cd77057f3edb63432b25e50aecc285734a7e 100644
--- a/apps/cpu/Wing/wing.cpp
+++ b/apps/cpu/Wing/wing.cpp
@@ -30,7 +30,7 @@ void setup(const char *cstr1, const char *cstr2)
       int refineLevel = UbSystem::stringTo<int>(cf.getValue("refineLevel"));
       int blocknx = UbSystem::stringTo<int>(cf.getValue("blocknx"));
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "Bombadil") int dumy=0; 
diff --git a/apps/cpu/aperm/aperm.cpp b/apps/cpu/aperm/aperm.cpp
index 7591afe4d31c3b0c75097dd44a31477e852dbb00..44b48bb3cfd4038da8be18bfa1a0328edb7cb570 100644
--- a/apps/cpu/aperm/aperm.cpp
+++ b/apps/cpu/aperm/aperm.cpp
@@ -59,7 +59,7 @@ void run(string configname)
       double          cpStepStart = config.getDouble("cpStepStart");
       bool            newStart = config.getValue<bool>("newStart");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/aperm/aperm.cpp.old b/apps/cpu/aperm/aperm.cpp.old
index fd6f916d8155eda8719b3c8cf3382af513206989..3776c454f75d0510a6ec4f033d25f35aa82618d4 100644
--- a/apps/cpu/aperm/aperm.cpp.old
+++ b/apps/cpu/aperm/aperm.cpp.old
@@ -58,7 +58,7 @@ void run(string configname)
       bool            yDir = config.getBool("yDir");
       bool            zDir = config.getBool("zDir");
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/aperm/aperm.cpp.old2 b/apps/cpu/aperm/aperm.cpp.old2
index 8f0cf83e2f8ae1731dd0e76dccb4a50bddc00ba5..ece4410e92f4ac587391ed0921400c4110cfb921 100644
--- a/apps/cpu/aperm/aperm.cpp.old2
+++ b/apps/cpu/aperm/aperm.cpp.old2
@@ -55,7 +55,7 @@ void run(string configname)
       bool            yDir = config.getBool("yDir");
       bool            zDir = config.getBool("zDir");
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/bChannelA/bChannelA.cpp b/apps/cpu/bChannelA/bChannelA.cpp
index 01725f2a3b883c0c0a53c6578dc581eadb0c8dc5..a59d829c35e6d1211d206439803c732f832831b2 100644
--- a/apps/cpu/bChannelA/bChannelA.cpp
+++ b/apps/cpu/bChannelA/bChannelA.cpp
@@ -111,7 +111,7 @@ void run(string configname)
       vector<double>  nupsStep          = config.getVector<double>("nupsStep");
       vector<double>  boundingBox       = config.getVector<double>("boundingBox");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/bChannelVA/bChannelVA.cpp b/apps/cpu/bChannelVA/bChannelVA.cpp
index 6cfe5dac2557f167864495599074cd3c94da6517..363d02697f0fde6ff18736b07e09e1ac3310bea7 100644
--- a/apps/cpu/bChannelVA/bChannelVA.cpp
+++ b/apps/cpu/bChannelVA/bChannelVA.cpp
@@ -13,7 +13,7 @@ int main(int argc, char* argv[])
    try
    {
       //Sleep(20000);
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       //Pheonix
diff --git a/apps/cpu/bKanal/HLRNb/bKanal.cpp b/apps/cpu/bKanal/HLRNb/bKanal.cpp
index 0c5c46a0cb78354563425685c8346ff81258ccd2..99b21eabf2a3c11b325fc3727e5acb795b394dfc 100644
--- a/apps/cpu/bKanal/HLRNb/bKanal.cpp
+++ b/apps/cpu/bKanal/HLRNb/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
diff --git a/apps/cpu/bKanal/bKanal.cpp b/apps/cpu/bKanal/bKanal.cpp
index 94af8f6aa46ddf5f398747805836ba95ce1dbbaf..33994ad706dc50ec1f1532f2b6b31fec84f4a520 100644
--- a/apps/cpu/bKanal/bKanal.cpp
+++ b/apps/cpu/bKanal/bKanal.cpp
@@ -24,7 +24,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/bKanal/sKanal/bKanal.cpp b/apps/cpu/bKanal/sKanal/bKanal.cpp
index 6a9d3c2c697b04c176bc9c11aa38f7f719e07785..1048554fb5ffc1e91abc97cf2bbf7f4133533063 100644
--- a/apps/cpu/bKanal/sKanal/bKanal.cpp
+++ b/apps/cpu/bKanal/sKanal/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "PIPPINNEU") 
diff --git a/apps/cpu/bKanal2/bKanal2.cpp b/apps/cpu/bKanal2/bKanal2.cpp
index 10e6f988085244e2028f28fca4129bc354c49699..ec632612596d14a747b48500d7436bbf4876f081 100644
--- a/apps/cpu/bKanal2/bKanal2.cpp
+++ b/apps/cpu/bKanal2/bKanal2.cpp
@@ -24,7 +24,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/bKanalAv/bKanal.cpp b/apps/cpu/bKanalAv/bKanal.cpp
index 71ca1ed0464afd67adf8db473ccdbf9487b8acda..27bf3c1a61f1d429e60a057a74479ad306098826 100644
--- a/apps/cpu/bKanalAv/bKanal.cpp
+++ b/apps/cpu/bKanalAv/bKanal.cpp
@@ -27,7 +27,7 @@ void run(const char *cstr)
 
       UbLog::reportingLevel() = logINFO;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
diff --git a/apps/cpu/band/band.cpp b/apps/cpu/band/band.cpp
index 370e50341662d21f25407cec428b1c20ee543a37..b454ff1284bbb1d64d657e9c87a51be2cbf06d66 100644
--- a/apps/cpu/band/band.cpp
+++ b/apps/cpu/band/band.cpp
@@ -20,7 +20,7 @@ void run(const char *cstr)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/bbone/bbone.cpp b/apps/cpu/bbone/bbone.cpp
index 3eb6c827c6157c4dc6810ffaab402e3e51337c93..558ff4c9848bac363e4e01ad94fcd450745ea3de 100644
--- a/apps/cpu/bbone/bbone.cpp
+++ b/apps/cpu/bbone/bbone.cpp
@@ -33,7 +33,7 @@ void sbonepd(string configname)
       bool            logToFile         = config.getBool("logToFile");
       double          deltaT            = config.getDouble("deltaT");
       
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/block_test/block_test_incompressible.hpp b/apps/cpu/block_test/block_test_incompressible.hpp
index 2ce506c93f4611a3069140703e712dbcca7fe661..61b8d762bdf727816f07833c30e02e86e02c2c20 100644
--- a/apps/cpu/block_test/block_test_incompressible.hpp
+++ b/apps/cpu/block_test/block_test_incompressible.hpp
@@ -29,7 +29,7 @@ void block_test_incompressible(const char *cstr1, const char *cstr2)
       int numOfThreads = UbSystem::stringTo<int>(cf.getValue("numOfThreads"));
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/bond_benchmark/bonb_b_chanel.cpp b/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
index b5e63c50d5a9ba91abb872319a7e68de9df28b97..7df5a016279b60337a5a24b4c3a5241d7f0555b9 100644
--- a/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
+++ b/apps/cpu/bond_benchmark/bonb_b_chanel.cpp
@@ -29,7 +29,7 @@ void chanel(const char *cstr)
 
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = vf::mpi::MPICommunicator::getInstance();
+         comm = vf::parallel::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
       
diff --git a/apps/cpu/bond_benchmark/bond_b.cpp b/apps/cpu/bond_benchmark/bond_b.cpp
index 6d607811a21f4dc111f6b003bf9343c60973207c..e3924595d89afd1f81cd4dde159f42a569e0cd01 100644
--- a/apps/cpu/bond_benchmark/bond_b.cpp
+++ b/apps/cpu/bond_benchmark/bond_b.cpp
@@ -35,7 +35,7 @@ void periodic(const char *cstr1, const char *cstr2)
 
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = vf::mpi::MPICommunicator::getInstance();
+         comm = vf::parallel::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
 
diff --git a/apps/cpu/bond_test/bond_test.cpp b/apps/cpu/bond_test/bond_test.cpp
index b7091184ff789dd6ac56e8c085853e5a45c088a0..fd77c285f540034b8c1d432097c45d9d0c6e9a7c 100644
--- a/apps/cpu/bond_test/bond_test.cpp
+++ b/apps/cpu/bond_test/bond_test.cpp
@@ -153,7 +153,7 @@ void simulation(const char *cstr)
       CommunicatorPtr comm;
       string comm_type = cf.getValue("comm");
       if(comm_type == "MPI")
-         comm = vf::mpi::MPICommunicator::getInstance();
+         comm = vf::parallel::MPICommunicator::getInstance();
       else if(comm_type == "BOND")
          comm = BondCommunicator::getInstance();
 
diff --git a/apps/cpu/bone/bone.cpp b/apps/cpu/bone/bone.cpp
index 849241ba26fc515ca2ee4ac3bd127742c0c693e5..17d0eca37975fa2cee87aa87f815d7237673111d 100644
--- a/apps/cpu/bone/bone.cpp
+++ b/apps/cpu/bone/bone.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
diff --git a/apps/cpu/f16Test/f16test.cpp b/apps/cpu/f16Test/f16test.cpp
index 2360d962a59015c0c73d869cd97a68d65d3afeee..32f424bdbcd8c2fa9f9596147757dc63f2c0b684 100644
--- a/apps/cpu/f16Test/f16test.cpp
+++ b/apps/cpu/f16Test/f16test.cpp
@@ -42,7 +42,7 @@ void run(string configname)
       double          refineDistance = config.getDouble("refineDistance");
       vector<double>  nupsStep = config.getVector<double>("nupsStep");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/insitu_demo/insitu_demo.cpp b/apps/cpu/insitu_demo/insitu_demo.cpp
index 42a1c6b4c636801bbaa50a1027751cd88301edfb..bd5f150191f2b9a63a1b7532d8f91b8f068d089a 100644
--- a/apps/cpu/insitu_demo/insitu_demo.cpp
+++ b/apps/cpu/insitu_demo/insitu_demo.cpp
@@ -15,7 +15,7 @@ void chanel(const char *cstr1)
       double availMem = 0;
 
       //CommunicatorPtr comm = FETOLCommunicator::getInstance();
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
       int mybundle = comm->getBundleID();
diff --git a/apps/cpu/levels/levels.cpp b/apps/cpu/levels/levels.cpp
index a5ac8588023b76b3043d333e6056426147fa4689..10672abe42441f69e9536fb78e54efb01503264e 100644
--- a/apps/cpu/levels/levels.cpp
+++ b/apps/cpu/levels/levels.cpp
@@ -14,7 +14,7 @@ void run(string configname)
 
       string machine = QUOTEME(CAB_MACHINE);
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
       int mybundle = comm->getBundleID();
diff --git a/apps/cpu/micropart/micropartTestQs3.hpp b/apps/cpu/micropart/micropartTestQs3.hpp
index 14e9a84412a51548b91f668369029afd057241c5..d8c870269b42929d5844bb516c79d091d6ab40ad 100644
--- a/apps/cpu/micropart/micropartTestQs3.hpp
+++ b/apps/cpu/micropart/micropartTestQs3.hpp
@@ -9,7 +9,7 @@ void micropartTestQs3(const char *cstr)
 {
    try
    {
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
       int numprocs = comm->getNumberOfProcesses();
 
diff --git a/apps/cpu/mirror/mirror.cpp b/apps/cpu/mirror/mirror.cpp
index b85d9b249ba60e10584d00ba95eda73da915ef5b..99ba78ff170513b6b8e92681b203f74dbc44e7d8 100644
--- a/apps/cpu/mirror/mirror.cpp
+++ b/apps/cpu/mirror/mirror.cpp
@@ -49,7 +49,7 @@ void run(string configname)
       string          VRES1100_Spiegel_fein = config.getValue<string>("VRES1100_Spiegel_fein");
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/mpi_benchmark/mpib.cpp b/apps/cpu/mpi_benchmark/mpib.cpp
index 797efbc7ed5ea87b31b30cfc3b6258a239fbd5d2..8ddf7bde30aaaf2f6707e3ac53e577919c546f40 100644
--- a/apps/cpu/mpi_benchmark/mpib.cpp
+++ b/apps/cpu/mpi_benchmark/mpib.cpp
@@ -8,7 +8,7 @@ using namespace std;
 
 void run(string configname)
 {
-   SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+   SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
    int myid = comm->getProcessID();
 
    // Get the name of the processor
diff --git a/apps/cpu/pChannel/pChannel.cpp b/apps/cpu/pChannel/pChannel.cpp
index 72292679f3ef53eaf81e1f26fa54659f5b002584..d30b28d262fa8d09c1658452eb11496b5c5c61f9 100644
--- a/apps/cpu/pChannel/pChannel.cpp
+++ b/apps/cpu/pChannel/pChannel.cpp
@@ -206,7 +206,7 @@ void run(string configname)
       vector<double>  nupsStep          = config.getVector<double>("nupsStep");
       vector<double>  boundingBox       = config.getVector<double>("boundingBox");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/pChannel/pChannel.cpp.hlrn b/apps/cpu/pChannel/pChannel.cpp.hlrn
index f25a0c4c2e62d6b2b97ff338d567ef911bdc9d14..812566c96d046cf9da9b72bd388ce0696224df7b 100644
--- a/apps/cpu/pChannel/pChannel.cpp.hlrn
+++ b/apps/cpu/pChannel/pChannel.cpp.hlrn
@@ -52,7 +52,7 @@ void run(string configname)
       double          timeLineTsStop    = config.getDouble("timeLineTsStop");
 
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/pDisk/pdisk.cpp b/apps/cpu/pDisk/pdisk.cpp
index f19e04ff81222e7eb448c1f0236669fb824fad23..fed4f38b64eb8fa8471fc9e0c389fe40f77ffd4a 100644
--- a/apps/cpu/pDisk/pdisk.cpp
+++ b/apps/cpu/pDisk/pdisk.cpp
@@ -39,7 +39,7 @@ void run(string configname)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
 
diff --git a/apps/cpu/perm/perm.cpp b/apps/cpu/perm/perm.cpp
index ff0af00b8d10af2715542aaee8adf53abf633db4..4ea9ac93742da1bc65cd266d028bd09bcc195811 100644
--- a/apps/cpu/perm/perm.cpp
+++ b/apps/cpu/perm/perm.cpp
@@ -44,7 +44,7 @@ void perm(string configname)
       double          deltax = config.getValue<double>("deltax");
       bool            writeSampleToFile = config.getValue<bool>("writeSampleToFile");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/perm/perm.cpp_s b/apps/cpu/perm/perm.cpp_s
index 21db434d5290ee948665f45af8ae1c93a84d9336..e40c55fbf49d157433822462cfa5e0cfec9636a6 100644
--- a/apps/cpu/perm/perm.cpp_s
+++ b/apps/cpu/perm/perm.cpp_s
@@ -23,7 +23,7 @@ void perm(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (machine == "BOMBADIL")
diff --git a/apps/cpu/plate/plate.cpp b/apps/cpu/plate/plate.cpp
index 28db0262fa649ea93f8b44cf69821557ad53961e..e4c78c6044106280732f151f9610e3791ff502a4 100644
--- a/apps/cpu/plate/plate.cpp
+++ b/apps/cpu/plate/plate.cpp
@@ -25,7 +25,7 @@ void run(const char *cstr, double endTime)
 
       //UbLog::reportingLevel() = logDEBUG5;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/plate2/plate2.cpp b/apps/cpu/plate2/plate2.cpp
index a908abf5b3652dcdd24c44202950f4962351c735..1fd5a281ebae6906ae7706735d32b4aedbbe8199 100644
--- a/apps/cpu/plate2/plate2.cpp
+++ b/apps/cpu/plate2/plate2.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
diff --git a/apps/cpu/poiseuille_example/poiseuille.cpp b/apps/cpu/poiseuille_example/poiseuille.cpp
index 52fede221deb1dd8ffa800fae711ff8517895054..d2e2c178e247f69d1629374661da9746ad8542bc 100644
--- a/apps/cpu/poiseuille_example/poiseuille.cpp
+++ b/apps/cpu/poiseuille_example/poiseuille.cpp
@@ -25,7 +25,7 @@ int main()
     const auto lbmUnitConverter = std::make_shared<LBMUnitConverter>();
     const auto writer = WbWriterVtkXmlBinary::getInstance();
 
-    const auto communicator = vf::mpi::MPICommunicator::getInstance();
+    const auto communicator = vf::parallel::MPICommunicator::getInstance();
     const auto kernel = std::make_shared<CompressibleCumulant4thOrderViscosityLBMKernel>();
     kernel->setBCProcessor(std::make_shared<BCProcessor>());
     kernel->setForcingX1(1e-6 * lbmUnitConverter->getFactorForceWToLb());
diff --git a/apps/cpu/porplate2/porplate.cpp b/apps/cpu/porplate2/porplate.cpp
index 2414e07732b18cac8a8c7c61b276f007c16826ef..fe93f8fa59d7e9befa63176b253f172ae09e1e87 100644
--- a/apps/cpu/porplate2/porplate.cpp
+++ b/apps/cpu/porplate2/porplate.cpp
@@ -316,7 +316,7 @@ void run(const char *cstr, bool firststart)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr);
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index ca3378bd5d6a4322b66877b25fca89d5ae478aac..68db541bc922d3c269d3f626e19ddd6a1001d071 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -43,7 +43,7 @@ void bflow(string configname)
 
       //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/sbone/sbone.cpp b/apps/cpu/sbone/sbone.cpp
index 321396da68d290946c16b36955cc1be98c10cf84..b52aaa3ac6e40ec9550171125141edf28b3fd89d 100644
--- a/apps/cpu/sbone/sbone.cpp
+++ b/apps/cpu/sbone/sbone.cpp
@@ -23,7 +23,7 @@ void sbonepd(const char *configname)
          throw exceptionText;
       }
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if(machine == "BOMBADIL") 
diff --git a/apps/cpu/screw/screw.cpp b/apps/cpu/screw/screw.cpp
index 7ba90a58644e8d19181847dd0825abb57366daf5..ad7c997743a0ad10b4301765aaf73392f61f4bcc 100644
--- a/apps/cpu/screw/screw.cpp
+++ b/apps/cpu/screw/screw.cpp
@@ -29,7 +29,7 @@ int main(int argc, char* argv[])
       int             restartStep  = config.getValue<int>("restartStep");
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       SPtr<LBMUnitConverter> conv = SPtr<LBMUnitConverter>(new LBMUnitConverter());
diff --git a/apps/cpu/sphere/sphere.cpp b/apps/cpu/sphere/sphere.cpp
index 1f0f5c116988f9d4bb8d068768d057db376a42d0..5411449c79fcbf0bece01517eb81c4b7f1d4d52f 100644
--- a/apps/cpu/sphere/sphere.cpp
+++ b/apps/cpu/sphere/sphere.cpp
@@ -11,7 +11,7 @@ void run(string configname)
 
    try
    {
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
 
       int myid = comm->getProcessID();
 
diff --git a/apps/cpu/stick/stick.cpp b/apps/cpu/stick/stick.cpp
index 62efec8098241d440a2b2292ca5018fea915fe4e..8bbc820001c2e5e5b87c268d95bfce4f5323df70 100644
--- a/apps/cpu/stick/stick.cpp
+++ b/apps/cpu/stick/stick.cpp
@@ -19,7 +19,7 @@ void main()
       int numOfThreads = 4;
       double availMem = 10e9;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       double dx = 1;
diff --git a/apps/cpu/teperm/teperm.cpp b/apps/cpu/teperm/teperm.cpp
index 9c4c1585ffaee5dce85eb6d1d9d0de993e033d7b..78e57d8ef8200c8b0552d7b2670ca43c973e9182 100644
--- a/apps/cpu/teperm/teperm.cpp
+++ b/apps/cpu/teperm/teperm.cpp
@@ -63,7 +63,7 @@ void run(string configname)
       int             chunk = config.getValue<int>("chunk");
 
 
-      SPtr<vf::mpi::Communicator> comm = vf::mpi::MPICommunicator::getInstance();
+      SPtr<vf::parallel::Communicator> comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       if (logToFile)
diff --git a/apps/cpu/town/town.cpp b/apps/cpu/town/town.cpp
index ccaf90f8d277df9e16a5e3592eafa649e142d235..7fcb83b314188d93a3778d06b439c5ff8e384d25 100644
--- a/apps/cpu/town/town.cpp
+++ b/apps/cpu/town/town.cpp
@@ -18,7 +18,7 @@ void run(const char *cstr1, const char *cstr2)
       stringstream logFilename;
       double availMem = 0;
 
-      CommunicatorPtr comm = vf::mpi::MPICommunicator::getInstance();
+      CommunicatorPtr comm = vf::parallel::MPICommunicator::getInstance();
       int myid = comm->getProcessID();
 
       string machine = string(cstr1);
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/ActuatorLine/ActuatorLine.cpp
similarity index 97%
rename from apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
rename to apps/gpu/ActuatorLine/ActuatorLine.cpp
index 40499be535d4025b7d9d5142a2ea718aa63590f3..2586a5da937c9cad0c94a268d046194f1e16301b 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/ActuatorLine/ActuatorLine.cpp
@@ -31,28 +31,25 @@
 //! \author Henry Korb, Henrik Asmuth
 //=======================================================================================
 #define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
+#include <cmath>
 #include <exception>
+#include <fstream>
+#include <iostream>
 #include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "StringUtilities/StringUtil.h"
-
-
-
+#include <basics/DataTypes.h>
+#include <basics/PointerDefinitions.h>
+#include <basics/StringUtilities/StringUtil.h>
 #include <basics/config/ConfigurationFile.h>
 
 #include <logger/Logger.h>
 
+#include <parallel/MPICommunicator.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -69,7 +66,6 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -105,7 +101,7 @@ std::string simulationName("ActuatorLine");
 
 void multipleLevel(const std::string& configPath)
 {
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
@@ -134,7 +130,7 @@ void multipleLevel(const std::string& configPath)
     const float tStartOutProbe      =  config.getValue<real>("tStartOutProbe");
     const float tOutProbe           =  config.getValue<real>("tOutProbe");
         
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
 
diff --git a/apps/gpu/LBM/ActuatorLine/CMakeLists.txt b/apps/gpu/ActuatorLine/CMakeLists.txt
similarity index 81%
rename from apps/gpu/LBM/ActuatorLine/CMakeLists.txt
rename to apps/gpu/ActuatorLine/CMakeLists.txt
index e0ff4e06e83a957be6966a7322ff06a0d068d18a..c437ac81a0e23fadd925ca81af596f81ff59f820 100644
--- a/apps/gpu/LBM/ActuatorLine/CMakeLists.txt
+++ b/apps/gpu/ActuatorLine/CMakeLists.txt
@@ -1,6 +1,6 @@
 PROJECT(ActuatorLine LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES ActuatorLine.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES ActuatorLine.cpp)
 
 set_source_files_properties(ActuatorLine.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/ActuatorLine/configActuatorLine.txt b/apps/gpu/ActuatorLine/configActuatorLine.txt
similarity index 100%
rename from apps/gpu/LBM/ActuatorLine/configActuatorLine.txt
rename to apps/gpu/ActuatorLine/configActuatorLine.txt
diff --git a/apps/gpu/LBM/Basel/3rdPartyLinking.cmake b/apps/gpu/Basel/3rdPartyLinking.cmake
similarity index 100%
rename from apps/gpu/LBM/Basel/3rdPartyLinking.cmake
rename to apps/gpu/Basel/3rdPartyLinking.cmake
diff --git a/apps/gpu/LBM/Basel/CMakeLists.txt b/apps/gpu/Basel/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/CMakeLists.txt
rename to apps/gpu/Basel/CMakeLists.txt
diff --git a/apps/gpu/LBM/Basel/CMakePackage.cmake b/apps/gpu/Basel/CMakePackage.cmake
similarity index 100%
rename from apps/gpu/LBM/Basel/CMakePackage.cmake
rename to apps/gpu/Basel/CMakePackage.cmake
diff --git a/apps/gpu/LBM/Basel/main.cpp b/apps/gpu/Basel/main.cpp
similarity index 100%
rename from apps/gpu/LBM/Basel/main.cpp
rename to apps/gpu/Basel/main.cpp
diff --git a/apps/gpu/LBM/Basel/resources/Junctions.txt b/apps/gpu/Basel/resources/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/Junctions.txt
rename to apps/gpu/Basel/resources/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/Sinks.txt b/apps/gpu/Basel/resources/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/Sinks.txt
rename to apps/gpu/Basel/resources/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/Sources.txt b/apps/gpu/Basel/resources/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/Sources.txt
rename to apps/gpu/Basel/resources/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/Streets.txt b/apps/gpu/Basel/resources/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/Streets.txt
rename to apps/gpu/Basel/resources/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreets/Junctions.txt b/apps/gpu/Basel/resources/allStreets/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreets/Junctions.txt
rename to apps/gpu/Basel/resources/allStreets/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreets/Sinks.txt b/apps/gpu/Basel/resources/allStreets/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreets/Sinks.txt
rename to apps/gpu/Basel/resources/allStreets/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreets/Sources.txt b/apps/gpu/Basel/resources/allStreets/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreets/Sources.txt
rename to apps/gpu/Basel/resources/allStreets/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreets/Streets.txt b/apps/gpu/Basel/resources/allStreets/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreets/Streets.txt
rename to apps/gpu/Basel/resources/allStreets/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsDouble/Junctions.txt b/apps/gpu/Basel/resources/allStreetsDouble/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsDouble/Junctions.txt
rename to apps/gpu/Basel/resources/allStreetsDouble/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsDouble/Sinks.txt b/apps/gpu/Basel/resources/allStreetsDouble/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsDouble/Sinks.txt
rename to apps/gpu/Basel/resources/allStreetsDouble/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsDouble/Sources.txt b/apps/gpu/Basel/resources/allStreetsDouble/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsDouble/Sources.txt
rename to apps/gpu/Basel/resources/allStreetsDouble/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsDouble/Streets.txt b/apps/gpu/Basel/resources/allStreetsDouble/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsDouble/Streets.txt
rename to apps/gpu/Basel/resources/allStreetsDouble/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Junctions.txt b/apps/gpu/Basel/resources/allStreetsOneClosed/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Junctions.txt
rename to apps/gpu/Basel/resources/allStreetsOneClosed/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Sinks.txt b/apps/gpu/Basel/resources/allStreetsOneClosed/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Sinks.txt
rename to apps/gpu/Basel/resources/allStreetsOneClosed/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Sources.txt b/apps/gpu/Basel/resources/allStreetsOneClosed/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Sources.txt
rename to apps/gpu/Basel/resources/allStreetsOneClosed/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Streets.txt b/apps/gpu/Basel/resources/allStreetsOneClosed/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsOneClosed/Streets.txt
rename to apps/gpu/Basel/resources/allStreetsOneClosed/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Junctions.txt b/apps/gpu/Basel/resources/allStreetsQuadruple/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Junctions.txt
rename to apps/gpu/Basel/resources/allStreetsQuadruple/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Sinks.txt b/apps/gpu/Basel/resources/allStreetsQuadruple/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Sinks.txt
rename to apps/gpu/Basel/resources/allStreetsQuadruple/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Sources.txt b/apps/gpu/Basel/resources/allStreetsQuadruple/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Sources.txt
rename to apps/gpu/Basel/resources/allStreetsQuadruple/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Streets.txt b/apps/gpu/Basel/resources/allStreetsQuadruple/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/allStreetsQuadruple/Streets.txt
rename to apps/gpu/Basel/resources/allStreetsQuadruple/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/fourStreets/Junctions.txt b/apps/gpu/Basel/resources/fourStreets/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/fourStreets/Junctions.txt
rename to apps/gpu/Basel/resources/fourStreets/Junctions.txt
diff --git a/apps/gpu/LBM/Basel/resources/fourStreets/Sinks.txt b/apps/gpu/Basel/resources/fourStreets/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/fourStreets/Sinks.txt
rename to apps/gpu/Basel/resources/fourStreets/Sinks.txt
diff --git a/apps/gpu/LBM/Basel/resources/fourStreets/Sources.txt b/apps/gpu/Basel/resources/fourStreets/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/fourStreets/Sources.txt
rename to apps/gpu/Basel/resources/fourStreets/Sources.txt
diff --git a/apps/gpu/LBM/Basel/resources/fourStreets/Streets.txt b/apps/gpu/Basel/resources/fourStreets/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/fourStreets/Streets.txt
rename to apps/gpu/Basel/resources/fourStreets/Streets.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions1.txt b/apps/gpu/Basel/resources/testStreets/Junctions1.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions1.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions1.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions2.txt b/apps/gpu/Basel/resources/testStreets/Junctions2.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions2.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions2.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions3.txt b/apps/gpu/Basel/resources/testStreets/Junctions3.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions3.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions3.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions4.txt b/apps/gpu/Basel/resources/testStreets/Junctions4.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions4.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions4.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions5.txt b/apps/gpu/Basel/resources/testStreets/Junctions5.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions5.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions5.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Junctions6.txt b/apps/gpu/Basel/resources/testStreets/Junctions6.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Junctions6.txt
rename to apps/gpu/Basel/resources/testStreets/Junctions6.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks1.txt b/apps/gpu/Basel/resources/testStreets/Sinks1.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks1.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks1.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks2.txt b/apps/gpu/Basel/resources/testStreets/Sinks2.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks2.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks2.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks3.txt b/apps/gpu/Basel/resources/testStreets/Sinks3.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks3.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks3.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks4.txt b/apps/gpu/Basel/resources/testStreets/Sinks4.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks4.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks4.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks5.txt b/apps/gpu/Basel/resources/testStreets/Sinks5.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks5.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks5.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sinks6.txt b/apps/gpu/Basel/resources/testStreets/Sinks6.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sinks6.txt
rename to apps/gpu/Basel/resources/testStreets/Sinks6.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources1.txt b/apps/gpu/Basel/resources/testStreets/Sources1.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources1.txt
rename to apps/gpu/Basel/resources/testStreets/Sources1.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources2.txt b/apps/gpu/Basel/resources/testStreets/Sources2.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources2.txt
rename to apps/gpu/Basel/resources/testStreets/Sources2.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources3.txt b/apps/gpu/Basel/resources/testStreets/Sources3.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources3.txt
rename to apps/gpu/Basel/resources/testStreets/Sources3.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources4.txt b/apps/gpu/Basel/resources/testStreets/Sources4.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources4.txt
rename to apps/gpu/Basel/resources/testStreets/Sources4.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources5.txt b/apps/gpu/Basel/resources/testStreets/Sources5.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources5.txt
rename to apps/gpu/Basel/resources/testStreets/Sources5.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Sources6.txt b/apps/gpu/Basel/resources/testStreets/Sources6.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Sources6.txt
rename to apps/gpu/Basel/resources/testStreets/Sources6.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets1.txt b/apps/gpu/Basel/resources/testStreets/Streets1.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets1.txt
rename to apps/gpu/Basel/resources/testStreets/Streets1.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets2.txt b/apps/gpu/Basel/resources/testStreets/Streets2.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets2.txt
rename to apps/gpu/Basel/resources/testStreets/Streets2.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets3.txt b/apps/gpu/Basel/resources/testStreets/Streets3.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets3.txt
rename to apps/gpu/Basel/resources/testStreets/Streets3.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets4.txt b/apps/gpu/Basel/resources/testStreets/Streets4.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets4.txt
rename to apps/gpu/Basel/resources/testStreets/Streets4.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets5.txt b/apps/gpu/Basel/resources/testStreets/Streets5.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets5.txt
rename to apps/gpu/Basel/resources/testStreets/Streets5.txt
diff --git a/apps/gpu/LBM/Basel/resources/testStreets/Streets6.txt b/apps/gpu/Basel/resources/testStreets/Streets6.txt
similarity index 100%
rename from apps/gpu/LBM/Basel/resources/testStreets/Streets6.txt
rename to apps/gpu/Basel/resources/testStreets/Streets6.txt
diff --git a/apps/gpu/LBM/BaselMultiGPU/3rdPartyLinking.cmake b/apps/gpu/BaselMultiGPU/3rdPartyLinking.cmake
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/3rdPartyLinking.cmake
rename to apps/gpu/BaselMultiGPU/3rdPartyLinking.cmake
diff --git a/apps/gpu/LBM/BaselMultiGPU/CMakeLists.txt b/apps/gpu/BaselMultiGPU/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/CMakeLists.txt
rename to apps/gpu/BaselMultiGPU/CMakeLists.txt
diff --git a/apps/gpu/LBM/BaselMultiGPU/CMakePackage.cmake b/apps/gpu/BaselMultiGPU/CMakePackage.cmake
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/CMakePackage.cmake
rename to apps/gpu/BaselMultiGPU/CMakePackage.cmake
diff --git a/apps/gpu/LBM/BaselMultiGPU/main.cpp b/apps/gpu/BaselMultiGPU/main.cpp
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/main.cpp
rename to apps/gpu/BaselMultiGPU/main.cpp
diff --git a/apps/gpu/LBM/BaselMultiGPU/resources/ExampleStreets.txt b/apps/gpu/BaselMultiGPU/resources/ExampleStreets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/resources/ExampleStreets.txt
rename to apps/gpu/BaselMultiGPU/resources/ExampleStreets.txt
diff --git a/apps/gpu/LBM/BaselMultiGPU/resources/Junctions.txt b/apps/gpu/BaselMultiGPU/resources/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/resources/Junctions.txt
rename to apps/gpu/BaselMultiGPU/resources/Junctions.txt
diff --git a/apps/gpu/LBM/BaselMultiGPU/resources/Sinks.txt b/apps/gpu/BaselMultiGPU/resources/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/resources/Sinks.txt
rename to apps/gpu/BaselMultiGPU/resources/Sinks.txt
diff --git a/apps/gpu/LBM/BaselMultiGPU/resources/Sources.txt b/apps/gpu/BaselMultiGPU/resources/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselMultiGPU/resources/Sources.txt
rename to apps/gpu/BaselMultiGPU/resources/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/3rdPartyLinking.cmake b/apps/gpu/BaselNU/3rdPartyLinking.cmake
similarity index 100%
rename from apps/gpu/LBM/BaselNU/3rdPartyLinking.cmake
rename to apps/gpu/BaselNU/3rdPartyLinking.cmake
diff --git a/apps/gpu/LBM/BaselNU/CMakeLists.txt b/apps/gpu/BaselNU/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/CMakeLists.txt
rename to apps/gpu/BaselNU/CMakeLists.txt
diff --git a/apps/gpu/LBM/BaselNU/CMakePackage.cmake b/apps/gpu/BaselNU/CMakePackage.cmake
similarity index 100%
rename from apps/gpu/LBM/BaselNU/CMakePackage.cmake
rename to apps/gpu/BaselNU/CMakePackage.cmake
diff --git a/apps/gpu/LBM/BaselNU/main.cpp b/apps/gpu/BaselNU/main.cpp
similarity index 100%
rename from apps/gpu/LBM/BaselNU/main.cpp
rename to apps/gpu/BaselNU/main.cpp
diff --git a/apps/gpu/LBM/BaselNU/resources/Junctions.txt b/apps/gpu/BaselNU/resources/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/Junctions.txt
rename to apps/gpu/BaselNU/resources/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/Sinks.txt b/apps/gpu/BaselNU/resources/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/Sinks.txt
rename to apps/gpu/BaselNU/resources/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/Sources.txt b/apps/gpu/BaselNU/resources/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/Sources.txt
rename to apps/gpu/BaselNU/resources/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/Streets.txt b/apps/gpu/BaselNU/resources/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/Streets.txt
rename to apps/gpu/BaselNU/resources/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreets/Junctions.txt b/apps/gpu/BaselNU/resources/allStreets/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreets/Junctions.txt
rename to apps/gpu/BaselNU/resources/allStreets/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreets/Sinks.txt b/apps/gpu/BaselNU/resources/allStreets/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreets/Sinks.txt
rename to apps/gpu/BaselNU/resources/allStreets/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreets/Sources.txt b/apps/gpu/BaselNU/resources/allStreets/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreets/Sources.txt
rename to apps/gpu/BaselNU/resources/allStreets/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreets/Streets.txt b/apps/gpu/BaselNU/resources/allStreets/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreets/Streets.txt
rename to apps/gpu/BaselNU/resources/allStreets/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Junctions.txt b/apps/gpu/BaselNU/resources/allStreetsDouble/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Junctions.txt
rename to apps/gpu/BaselNU/resources/allStreetsDouble/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Sinks.txt b/apps/gpu/BaselNU/resources/allStreetsDouble/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Sinks.txt
rename to apps/gpu/BaselNU/resources/allStreetsDouble/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Sources.txt b/apps/gpu/BaselNU/resources/allStreetsDouble/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Sources.txt
rename to apps/gpu/BaselNU/resources/allStreetsDouble/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Streets.txt b/apps/gpu/BaselNU/resources/allStreetsDouble/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsDouble/Streets.txt
rename to apps/gpu/BaselNU/resources/allStreetsDouble/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Junctions.txt b/apps/gpu/BaselNU/resources/allStreetsOneClosed/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Junctions.txt
rename to apps/gpu/BaselNU/resources/allStreetsOneClosed/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Sinks.txt b/apps/gpu/BaselNU/resources/allStreetsOneClosed/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Sinks.txt
rename to apps/gpu/BaselNU/resources/allStreetsOneClosed/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Sources.txt b/apps/gpu/BaselNU/resources/allStreetsOneClosed/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Sources.txt
rename to apps/gpu/BaselNU/resources/allStreetsOneClosed/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Streets.txt b/apps/gpu/BaselNU/resources/allStreetsOneClosed/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsOneClosed/Streets.txt
rename to apps/gpu/BaselNU/resources/allStreetsOneClosed/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Junctions.txt b/apps/gpu/BaselNU/resources/allStreetsQuadruple/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Junctions.txt
rename to apps/gpu/BaselNU/resources/allStreetsQuadruple/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Sinks.txt b/apps/gpu/BaselNU/resources/allStreetsQuadruple/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Sinks.txt
rename to apps/gpu/BaselNU/resources/allStreetsQuadruple/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Sources.txt b/apps/gpu/BaselNU/resources/allStreetsQuadruple/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Sources.txt
rename to apps/gpu/BaselNU/resources/allStreetsQuadruple/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Streets.txt b/apps/gpu/BaselNU/resources/allStreetsQuadruple/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/allStreetsQuadruple/Streets.txt
rename to apps/gpu/BaselNU/resources/allStreetsQuadruple/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/fourStreets/Junctions.txt b/apps/gpu/BaselNU/resources/fourStreets/Junctions.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/fourStreets/Junctions.txt
rename to apps/gpu/BaselNU/resources/fourStreets/Junctions.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/fourStreets/Sinks.txt b/apps/gpu/BaselNU/resources/fourStreets/Sinks.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/fourStreets/Sinks.txt
rename to apps/gpu/BaselNU/resources/fourStreets/Sinks.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/fourStreets/Sources.txt b/apps/gpu/BaselNU/resources/fourStreets/Sources.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/fourStreets/Sources.txt
rename to apps/gpu/BaselNU/resources/fourStreets/Sources.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/fourStreets/Streets.txt b/apps/gpu/BaselNU/resources/fourStreets/Streets.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/fourStreets/Streets.txt
rename to apps/gpu/BaselNU/resources/fourStreets/Streets.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions1.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions1.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions1.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions1.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions2.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions2.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions2.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions2.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions3.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions3.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions3.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions3.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions4.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions4.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions4.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions4.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions5.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions5.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions5.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions5.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Junctions6.txt b/apps/gpu/BaselNU/resources/testStreets/Junctions6.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Junctions6.txt
rename to apps/gpu/BaselNU/resources/testStreets/Junctions6.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks1.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks1.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks1.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks1.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks2.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks2.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks2.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks2.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks3.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks3.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks3.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks3.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks4.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks4.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks4.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks4.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks5.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks5.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks5.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks5.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sinks6.txt b/apps/gpu/BaselNU/resources/testStreets/Sinks6.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sinks6.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sinks6.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources1.txt b/apps/gpu/BaselNU/resources/testStreets/Sources1.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources1.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources1.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources2.txt b/apps/gpu/BaselNU/resources/testStreets/Sources2.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources2.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources2.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources3.txt b/apps/gpu/BaselNU/resources/testStreets/Sources3.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources3.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources3.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources4.txt b/apps/gpu/BaselNU/resources/testStreets/Sources4.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources4.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources4.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources5.txt b/apps/gpu/BaselNU/resources/testStreets/Sources5.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources5.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources5.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Sources6.txt b/apps/gpu/BaselNU/resources/testStreets/Sources6.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Sources6.txt
rename to apps/gpu/BaselNU/resources/testStreets/Sources6.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets1.txt b/apps/gpu/BaselNU/resources/testStreets/Streets1.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets1.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets1.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets2.txt b/apps/gpu/BaselNU/resources/testStreets/Streets2.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets2.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets2.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets3.txt b/apps/gpu/BaselNU/resources/testStreets/Streets3.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets3.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets3.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets4.txt b/apps/gpu/BaselNU/resources/testStreets/Streets4.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets4.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets4.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets5.txt b/apps/gpu/BaselNU/resources/testStreets/Streets5.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets5.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets5.txt
diff --git a/apps/gpu/LBM/BaselNU/resources/testStreets/Streets6.txt b/apps/gpu/BaselNU/resources/testStreets/Streets6.txt
similarity index 100%
rename from apps/gpu/LBM/BaselNU/resources/testStreets/Streets6.txt
rename to apps/gpu/BaselNU/resources/testStreets/Streets6.txt
diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/BoundaryLayer/BoundaryLayer.cpp
similarity index 97%
rename from apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
rename to apps/gpu/BoundaryLayer/BoundaryLayer.cpp
index 4d53d26b817227000f258f5b8424a2f6963bcf41..652b7785e05960140b1fcc275c07489ae206e048 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/BoundaryLayer/BoundaryLayer.cpp
@@ -31,28 +31,27 @@
 //! \author Henry Korb, Henrik Asmuth
 //=======================================================================================
 #define _USE_MATH_DEFINES
-#include <math.h>
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
+#include <cmath>
 #include <exception>
+#include <fstream>
+#include <iostream>
 #include <memory>
 #include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "StringUtilities/StringUtil.h"
-
+#include <basics/DataTypes.h>
+#include <basics/PointerDefinitions.h>
+#include <basics/StringUtilities/StringUtil.h>
 #include <basics/config/ConfigurationFile.h>
-#include "basics/constants/NumericConstants.h"
+#include <basics/constants/NumericConstants.h>
 
 #include <logger/Logger.h>
 
+#include <parallel/MPICommunicator.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -71,7 +70,6 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -102,19 +100,18 @@ using namespace vf::basics::constant;
 void multipleLevel(const std::string& configPath)
 {
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory  = GridScalingFactory();
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     
-    const int  nProcs = communicator.getNumberOfProcess();
-    const uint procID = vf::gpu::MpiCommunicator::getInstance().getPID();
+    const int nProcs = communicator.getNumberOfProcesses();
+    const uint procID = communicator.getProcessID();
     std::vector<uint> devices(10);
     std::iota(devices.begin(), devices.end(), 0);
     para->setDevices(devices);
@@ -422,7 +419,6 @@ void multipleLevel(const std::string& configPath)
         SPtr<PrecursorWriter> precursorWriter = std::make_shared<PrecursorWriter>("precursor", para->getOutputPath()+precursorDirectory, posXPrecursor, 0, L_y, 0, L_z, tStartPrecursor/dt, nTWritePrecursor, useDistributions? OutputVariable::Distributions: OutputVariable::Velocities, 1000);
         para->addProbe(precursorWriter);
     }
-
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
diff --git a/apps/gpu/LBM/BoundaryLayer/CMakeLists.txt b/apps/gpu/BoundaryLayer/CMakeLists.txt
similarity index 81%
rename from apps/gpu/LBM/BoundaryLayer/CMakeLists.txt
rename to apps/gpu/BoundaryLayer/CMakeLists.txt
index 801b634803943d48abda690935df0867eb3418d2..248c7a1616610715d38bcbff129b226da2cfd1f2 100644
--- a/apps/gpu/LBM/BoundaryLayer/CMakeLists.txt
+++ b/apps/gpu/BoundaryLayer/CMakeLists.txt
@@ -1,6 +1,6 @@
 PROJECT(BoundaryLayer LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES BoundaryLayer.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES BoundaryLayer.cpp)
 
 set_source_files_properties(BoundaryLayer.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt b/apps/gpu/BoundaryLayer/configBoundaryLayer.txt
similarity index 100%
rename from apps/gpu/LBM/BoundaryLayer/configBoundaryLayer.txt
rename to apps/gpu/BoundaryLayer/configBoundaryLayer.txt
diff --git a/apps/gpu/LBM/ChannelFlow/CMakeLists.txt b/apps/gpu/ChannelFlow/CMakeLists.txt
similarity index 81%
rename from apps/gpu/LBM/ChannelFlow/CMakeLists.txt
rename to apps/gpu/ChannelFlow/CMakeLists.txt
index f5b1bfd40dc723a7c050091d6d1c82d5f9addbd5..3884074e05097b392d96a47287d5e5cad4c0d6f3 100644
--- a/apps/gpu/LBM/ChannelFlow/CMakeLists.txt
+++ b/apps/gpu/ChannelFlow/CMakeLists.txt
@@ -1,6 +1,6 @@
 PROJECT(ChannelFlow LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES ChannelFlow.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES ChannelFlow.cpp)
 
 set_source_files_properties(ChannelFlow.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp b/apps/gpu/ChannelFlow/ChannelFlow.cpp
similarity index 91%
rename from apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
rename to apps/gpu/ChannelFlow/ChannelFlow.cpp
index fb208bdc8356a202e0113736b3957ecd243f3b02..239d5ff75d3f9e5fc293c660153d398175b07756 100644
--- a/apps/gpu/LBM/ChannelFlow/ChannelFlow.cpp
+++ b/apps/gpu/ChannelFlow/ChannelFlow.cpp
@@ -41,15 +41,12 @@
 #include <stdexcept>
 #include <string>
 
-#include "mpi.h"
-
 //////////////////////////////////////////////////////////////////////////
 
-#include "DataTypes.h"
-
-#include "PointerDefinitions.h"
-#include "config/ConfigurationFile.h"
-#include <logger/Logger.h>
+#include <basics/DataTypes.h>
+#include <basics/PointerDefinitions.h>
+#include <basics/StringUtilities/StringUtil.h>
+#include <basics/config/ConfigurationFile.h>
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -64,7 +61,6 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
@@ -75,6 +71,8 @@
 
 //////////////////////////////////////////////////////////////////////////
 
+#include <parallel/MPICommunicator.h>
+
 int main(int argc, char *argv[])
 {
     try {
@@ -94,21 +92,21 @@ int main(int argc, char *argv[])
         // setup simulation parameters (without config file)
         //////////////////////////////////////////////////////////////////////////
 
-        vf::gpu::Communicator &communicator = vf::gpu::MpiCommunicator::getInstance();
-        const int numberOfProcesses = communicator.getNumberOfProcess();
-        SPtr<Parameter> para = std::make_shared<Parameter>(numberOfProcesses, communicator.getPID());
+        vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
+        const int numberOfProcesses = communicator.getNumberOfProcesses();
+        const auto processID = communicator.getProcessID();
+        SPtr<Parameter> para = std::make_shared<Parameter>(numberOfProcesses, processId);
         std::vector<uint> devices(10);
         std::iota(devices.begin(), devices.end(), 0);
         para->setDevices(devices);
-        para->setMaxDev(communicator.getNumberOfProcess());
+        para->setMaxDev(communicator.getNumberOfProcesses());
         BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
         //////////////////////////////////////////////////////////////////////////
         // setup logger
         //////////////////////////////////////////////////////////////////////////
-
         vf::logging::Logger::changeLogPath("output/vflog_process" +
-                                           std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + ".txt");
+                                           std::to_string(processId) + ".txt");
         vf::logging::Logger::initializeLogger();
 
         //////////////////////////////////////////////////////////////////////////
@@ -150,7 +148,6 @@ int main(int argc, char *argv[])
         para->setOutputPrefix("ChannelFlow");
         para->setMainKernel(vf::CollisionKernel::Compressible::K17CompressibleNavierStokes);
 
-        const uint generatePart = vf::gpu::MpiCommunicator::getInstance().getPID();
         real overlap = (real)8.0 * dx;
 
         if (numberOfProcesses > 1) {
@@ -159,15 +156,15 @@ int main(int argc, char *argv[])
             // add coarse grids
             //////////////////////////////////////////////////////////////////////////
 
-            real subdomainMinX = channelWidth * generatePart;
+            real subdomainMinX = channelWidth * processId;
             real subdomainMinXoverlap = subdomainMinX;
             real subdomainMaxX = subdomainMinX + channelWidth;
             real subdomainMaxXoverlap = subdomainMaxX;
 
-            if (generatePart != 0)
+            if (processID != 0)
                 subdomainMinXoverlap -= overlap;
 
-            if (generatePart != numberOfProcesses - 1)
+            if (processID != numberOfProcesses - 1)
                 subdomainMaxXoverlap += overlap;
 
             auto gridBuilder = std::make_shared<MultipleGridBuilder>();
@@ -192,14 +189,14 @@ int main(int argc, char *argv[])
             // configure communication neighbors
             //////////////////////////////////////////////////////////////////////////
 
-            if (generatePart != 0) {
+            if (processID != 0) {
                 gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::MX, generatePart - 1);
+                gridBuilder->setCommunicationProcess(CommunicationDirections::MX, processId - 1);
             }
 
-            if (generatePart != numberOfProcesses - 1) {
+            if (processID != numberOfProcesses - 1) {
                 gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::PX, generatePart + 1);
+                gridBuilder->setCommunicationProcess(CommunicationDirections::PX, processId + 1);
             }
 
             //////////////////////////////////////////////////////////////////////////
@@ -208,10 +205,10 @@ int main(int argc, char *argv[])
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-            if (generatePart == 0) {
+            if (processID == 0) {
                 gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
             }
-            if (generatePart == numberOfProcesses - 1) {
+            if (processID == numberOfProcesses - 1) {
                 gridBuilder->setPressureBoundaryCondition(SideType::PX,
                                                           0.0); // set pressure boundary condition last
                 bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
diff --git a/apps/gpu/DrivenCavity/CMakeLists.txt b/apps/gpu/DrivenCavity/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8646f29c13d689eaabee4274418620636643c592
--- /dev/null
+++ b/apps/gpu/DrivenCavity/CMakeLists.txt
@@ -0,0 +1,5 @@
+PROJECT(DrivenCavity LANGUAGES CXX)
+
+#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES DrivenCavity.cpp)
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/DrivenCavity/DrivenCavity.cpp
similarity index 98%
rename from apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
rename to apps/gpu/DrivenCavity/DrivenCavity.cpp
index 89d0f430c21f6433ebd8e1473db77d53712a0547..a1a37f3502b996de3378ebd98be0c4124460c029 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/DrivenCavity/DrivenCavity.cpp
@@ -46,6 +46,8 @@
 
 #include <logger/Logger.h>
 
+#include <parallel/MPICommunicator.h>
+
 //////////////////////////////////////////////////////////////////////////
 
 #include "GridGenerator/grid/BoundaryConditions/Side.h"
@@ -57,7 +59,6 @@
 
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
@@ -155,7 +156,7 @@ int main()
         // set copy mesh to simulation
         //////////////////////////////////////////////////////////////////////////
 
-        vf::gpu::Communicator &communicator = vf::gpu::MpiCommunicator::getInstance();
+        vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
         auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
         SPtr<GridProvider> gridGenerator =
diff --git a/apps/gpu/LBM/DrivenCavity/configDrivenCavity.txt b/apps/gpu/DrivenCavity/configDrivenCavity.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavity/configDrivenCavity.txt
rename to apps/gpu/DrivenCavity/configDrivenCavity.txt
diff --git a/apps/gpu/DrivenCavityMultiGPU/CMakeLists.txt b/apps/gpu/DrivenCavityMultiGPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6460abb1cb56b770cf91f57239eeb8262f34595a
--- /dev/null
+++ b/apps/gpu/DrivenCavityMultiGPU/CMakeLists.txt
@@ -0,0 +1,3 @@
+PROJECT(DrivenCavityMultiGPU LANGUAGES CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES DrivenCavityMultiGPU.cpp)
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
similarity index 97%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
rename to apps/gpu/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
index e97c7562142915c02e85bac9b7a6aed23072459e..f6d8a675a63e95f97f585352e092b4a63501cda5 100755
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ b/apps/gpu/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
@@ -1,14 +1,12 @@
 #define _USE_MATH_DEFINES
 #include <exception>
+#include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <string>
-#include <filesystem>
-
-#include "mpi.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -35,7 +33,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
@@ -52,7 +49,7 @@
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
 //////////////////////////////////////////////////////////////////////////
-
+#include <parallel/MPICommunicator.h>
 #include "utilities/communication.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -61,11 +58,11 @@
 
 void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
     auto gridBuilder = std::make_shared<MultipleGridBuilder>();
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
@@ -142,7 +139,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 
         if (para->getNumprocs() > 1) {
 
-            const uint generatePart = vf::gpu::MpiCommunicator::getInstance().getPID();
+            const uint generatePart = communicator.getProcessID();
             real overlap            = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
@@ -150,7 +147,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
             const real ySplit = 0.0;
             const real zSplit = 0.0;
 
-            if (communicator.getNumberOfProcess() == 2) {
+            if (communicator.getNumberOfProcesses() == 2) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xGridMax, yGridMax, zSplit + overlap,
@@ -197,7 +194,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                 gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 gridBuilder->setVelocityBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNumberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcesses() == 4) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, yGridMax,
@@ -281,7 +278,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                     gridBuilder->setVelocityBoundaryCondition(SideType::PX, 0.0, 0.0, 0.0);
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNumberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcesses() == 8) {
 
                 if (generatePart == 0) {
                     gridBuilder->addCoarseGrid(xGridMin, yGridMin, zGridMin, xSplit + overlap, ySplit + overlap,
@@ -514,7 +511,7 @@ int main(int argc, char *argv[])
 
         try {
             VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
-            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt");
+            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt");
             runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt b/apps/gpu/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
rename to apps/gpu/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix1GPU.txt b/apps/gpu/DrivenCavityMultiGPU/configPhoenix1GPU.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix1GPU.txt
rename to apps/gpu/DrivenCavityMultiGPU/configPhoenix1GPU.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix2GPU.txt b/apps/gpu/DrivenCavityMultiGPU/configPhoenix2GPU.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix2GPU.txt
rename to apps/gpu/DrivenCavityMultiGPU/configPhoenix2GPU.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt b/apps/gpu/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
rename to apps/gpu/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU.txt b/apps/gpu/DrivenCavityMultiGPU/configPhoenix8GPU.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU.txt
rename to apps/gpu/DrivenCavityMultiGPU/configPhoenix8GPU.txt
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt b/apps/gpu/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
rename to apps/gpu/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt
diff --git a/apps/gpu/DrivenCavityUniform/CMakeLists.txt b/apps/gpu/DrivenCavityUniform/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78d6d693a54b251b8c3361ac908f6099f4d7ccf0
--- /dev/null
+++ b/apps/gpu/DrivenCavityUniform/CMakeLists.txt
@@ -0,0 +1,5 @@
+PROJECT(DrivenCavityUniform LANGUAGES CXX)
+
+#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES DrivenCavity.cpp)
diff --git a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp b/apps/gpu/DrivenCavityUniform/DrivenCavity.cpp
similarity index 96%
rename from apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
rename to apps/gpu/DrivenCavityUniform/DrivenCavity.cpp
index d56d2da82cd29c5894594f13c37daafe287fd05c..70937e24aba53120ce20e3316324fd9f73d181e9 100644
--- a/apps/gpu/LBM/DrivenCavityUniform/DrivenCavity.cpp
+++ b/apps/gpu/DrivenCavityUniform/DrivenCavity.cpp
@@ -41,13 +41,13 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "DataTypes.h"
-#include <logger/Logger.h>
-
-#include "PointerDefinitions.h"
+#include <basics/DataTypes.h>
+#include <basics/PointerDefinitions.h>
 
 #include <logger/Logger.h>
 
+#include <parallel/MPICommunicator.h>
+
 //////////////////////////////////////////////////////////////////////////
 
 #include "GridGenerator/grid/BoundaryConditions/Side.h"
@@ -59,22 +59,21 @@
 
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 #include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
+
 //////////////////////////////////////////////////////////////////////////
 
 int main()
 {
     try {
-         vf::logging::Logger::initializeLogger();
+        vf::logging::Logger::initializeLogger();
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
         //////////////////////////////////////////////////////////////////////////
@@ -160,13 +159,12 @@ int main()
         // set copy mesh to simulation
         //////////////////////////////////////////////////////////////////////////
 
-        vf::gpu::Communicator &communicator = vf::gpu::MpiCommunicator::getInstance();
+        vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
         auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
         SPtr<GridProvider> gridGenerator =
             GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
-
         //////////////////////////////////////////////////////////////////////////
         // run simulation
         //////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt b/apps/gpu/DrivenCavityUniform/configDrivenCavity.txt
similarity index 100%
rename from apps/gpu/LBM/DrivenCavityUniform/configDrivenCavity.txt
rename to apps/gpu/DrivenCavityUniform/configDrivenCavity.txt
diff --git a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt b/apps/gpu/LBM/DrivenCavity/CMakeLists.txt
deleted file mode 100644
index 8384e1bc6fcfa3fd2514434b620b266e96b3626a..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-PROJECT(DrivenCavity LANGUAGES CUDA CXX)
-
-#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp)
-
-set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
-
-set_target_properties(DrivenCavity PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt
deleted file mode 100644
index 51b8db1edf126ebe7e2f3d5808496121270433c5..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-PROJECT(DrivenCavityMultiGPU LANGUAGES CUDA CXX)
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavityMultiGPU.cpp)
-
-set_source_files_properties(DrivenCavityMultiGPU.cpp PROPERTIES LANGUAGE CUDA)
-
-set_target_properties(DrivenCavityMultiGPU PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt b/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
deleted file mode 100644
index 40b4f08d7500c56efae7378df6398d065e4ecbfb..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/DrivenCavityUniform/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-PROJECT(DrivenCavityUniform LANGUAGES CUDA CXX)
-
-#LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp)
-
-set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
-
-set_target_properties(DrivenCavityUniform PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-
diff --git a/apps/gpu/LBM/MusselOyster/CMakeLists.txt b/apps/gpu/LBM/MusselOyster/CMakeLists.txt
deleted file mode 100644
index 595d9ff7250d984f80e8d0d54dad0b11ae7e71e2..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/MusselOyster/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-PROJECT(MusselOyster LANGUAGES CUDA CXX)
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES MusselOyster.cpp)
-
-set_source_files_properties(MusselOyster.cpp PROPERTIES LANGUAGE CUDA)
-
-set_target_properties(MusselOyster PROPERTIES 
-	CUDA_SEPARABLE_COMPILATION ON)
\ No newline at end of file
diff --git a/apps/gpu/LBM/SphereGPU/CMakeLists.txt b/apps/gpu/LBM/SphereGPU/CMakeLists.txt
deleted file mode 100644
index 6a8f3cb1b86b149852abf4365e15fd4df9e8c0cf..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/SphereGPU/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-PROJECT(SphereGPU LANGUAGES CUDA CXX)
-
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES Sphere.cpp)
-
-set_source_files_properties(Sphere.cpp PROPERTIES LANGUAGE CUDA)
-
-set_target_properties(SphereGPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/apps/gpu/LBM/lbmTest/3rdPartyLinking.cmake b/apps/gpu/LBM/lbmTest/3rdPartyLinking.cmake
deleted file mode 100644
index 6dd6ba1bc73e73dfbf01a3cc36aaeb3664e1c04c..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/lbmTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,13 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Boost/Link.cmake)
-linkBoost(${targetName} "serialization")
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-linkMetis(${targetName})
-
-if(HULC.BUILD_JSONCPP)
-  include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-  linkJsonCpp(${targetName})
-endif()
\ No newline at end of file
diff --git a/apps/gpu/LBM/lbmTest/CMakeLists.txt b/apps/gpu/LBM/lbmTest/CMakeLists.txt
deleted file mode 100644
index 6ef6241b52aa9b5dc73a19b82f2b9fd5248c77c5..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/lbmTest/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR}) 
-
-set(linkDirectories "")
-set(libsToLink VirtualFluids_GPU GridGenerator)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/src/VirtualFluids_GPU" "${CMAKE_SOURCE_DIR}/src/GridGenerator" "${CMAKE_SOURCE_DIR}/src/VirtualFluidsBasics")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${lbmAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
\ No newline at end of file
diff --git a/apps/gpu/LBM/lbmTest/CMakePackage.cmake b/apps/gpu/LBM/lbmTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/lbmTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/LBM/lbmTest/main.cpp b/apps/gpu/LBM/lbmTest/main.cpp
deleted file mode 100644
index 90640a3813a25e8249c7668a6e6c7779b0f01e4d..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/lbmTest/main.cpp
+++ /dev/null
@@ -1,819 +0,0 @@
-//#define MPI_LOGGING
-
-//Martin Branch
-
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
-
-#include <string>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include "metis.h"
-
-#include "Input/Input.h"
-#include "StringUtilities/StringUtil.h"
-
-#include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-#include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
-
-#include "global.h"
-
-#include "geometries/Sphere/Sphere.h"
-#include "geometries/VerticalCylinder/VerticalCylinder.h"
-#include "geometries/Cuboid/Cuboid.h"
-#include "geometries/TriangularMesh/TriangularMesh.h"
-#include "geometries/Conglomerate/Conglomerate.h"
-#include "geometries/TriangularMesh/TriangularMeshStrategy.h"
-
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "grid/GridBuilder/MultipleGridBuilder.h"
-#include "grid/BoundaryConditions/Side.h"
-#include "grid/BoundaryConditions/BoundaryCondition.h"
-#include "grid/GridFactory.h"
-
-#include "io/SimulationFileWriter/SimulationFileWriter.h"
-#include "io/GridVTKWriter/GridVTKWriter.h"
-#include "io/STLReaderWriter/STLReader.h"
-#include "io/STLReaderWriter/STLWriter.h"
-
-#include "utilities/math/Math.h"
-#include "utilities/communication.h"
-#include "utilities/transformator/TransformatorImp.h"
-
-std::string getGridPath(std::shared_ptr<Parameter> para, std::string Gridpath)
-{
-    if (para->getNumprocs() == 1)
-        return Gridpath + "/";
-    
-    return Gridpath + "/" + StringUtil::toString(para->getMyID()) + "/";
-}
-
-void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input> &input)
-{
-	Communicator* comm = Communicator::getInstanz();
-
-	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNumberOfProcess());
-	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
-	para->setMyID(comm->getPID());
-	
-	std::string _path = input->getValue("Path");
-    std::string _prefix = input->getValue("Prefix");
-    std::string _gridpath = input->getValue("GridPath");
-    std::string gridPath = getGridPath(para, _gridpath);
-    para->setOutputPath(_path);
-    para->setOutputPrefix(_prefix);
-    para->setPathAndFilename(_path + "/" + _prefix);
-    para->setPrintFiles(false);
-    para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
-    para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
-    para->setCalc2ndOrderMoments(StringUtil::toBool(input->getValue("calc2ndOrderMoments")));
-    para->setCalc3rdOrderMoments(StringUtil::toBool(input->getValue("calc3rdOrderMoments")));
-    para->setCalcHighOrderMoments(StringUtil::toBool(input->getValue("calcHigherOrderMoments")));
-    para->setReadGeo(StringUtil::toBool(input->getValue("ReadGeometry")));
-    para->setCalcMedian(StringUtil::toBool(input->getValue("calcMedian")));
-    para->setConcFile(StringUtil::toBool(input->getValue("UseConcFile")));
-    para->setUseMeasurePoints(StringUtil::toBool(input->getValue("UseMeasurePoints")));
-    para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
-    para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
-    para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
-    para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
-    para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
-    para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
-    para->setPressOutID(StringUtil::toInt(input->getValue("PressOutID")));
-    para->setPressInZ(StringUtil::toInt(input->getValue("PressInZ")));
-    para->setPressOutZ(StringUtil::toInt(input->getValue("PressOutZ")));
-    //////////////////////////////////////////////////////////////////////////
-    para->setDiffOn(StringUtil::toBool(input->getValue("DiffOn")));
-    para->setDiffMod(StringUtil::toInt(input->getValue("DiffMod")));
-    para->setDiffusivity(StringUtil::toFloat(input->getValue("Diffusivity")));
-    para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
-    para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
-    //////////////////////////////////////////////////////////////////////////
-    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
-    para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
-    para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
-    para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
-    para->setPressRatio(StringUtil::toFloat(input->getValue("Delta_Press")));
-    para->setRealX(StringUtil::toFloat(input->getValue("SliceRealX")));
-    para->setRealY(StringUtil::toFloat(input->getValue("SliceRealY")));
-    para->setFactorPressBC(StringUtil::toFloat(input->getValue("dfpbc")));
-    para->setGeometryFileC(input->getValue("GeometryC"));
-    para->setGeometryFileM(input->getValue("GeometryM"));
-    para->setGeometryFileF(input->getValue("GeometryF"));
-    //////////////////////////////////////////////////////////////////////////
-    para->setgeoVec(gridPath + input->getValue("geoVec"));
-    para->setcoordX(gridPath + input->getValue("coordX"));
-    para->setcoordY(gridPath + input->getValue("coordY"));
-    para->setcoordZ(gridPath + input->getValue("coordZ"));
-    para->setneighborX(gridPath + input->getValue("neighborX"));
-    para->setneighborY(gridPath + input->getValue("neighborY"));
-    para->setneighborZ(gridPath + input->getValue("neighborZ"));
-    para->setscaleCFC(gridPath + input->getValue("scaleCFC"));
-    para->setscaleCFF(gridPath + input->getValue("scaleCFF"));
-    para->setscaleFCC(gridPath + input->getValue("scaleFCC"));
-    para->setscaleFCF(gridPath + input->getValue("scaleFCF"));
-    para->setscaleOffsetCF(gridPath + input->getValue("scaleOffsetCF"));
-    para->setscaleOffsetFC(gridPath + input->getValue("scaleOffsetFC"));
-    para->setgeomBoundaryBcQs(gridPath + input->getValue("geomBoundaryBcQs"));
-    para->setgeomBoundaryBcValues(gridPath + input->getValue("geomBoundaryBcValues"));
-    para->setinletBcQs(gridPath + input->getValue("inletBcQs"));
-    para->setinletBcValues(gridPath + input->getValue("inletBcValues"));
-    para->setoutletBcQs(gridPath + input->getValue("outletBcQs"));
-    para->setoutletBcValues(gridPath + input->getValue("outletBcValues"));
-    para->settopBcQs(gridPath + input->getValue("topBcQs"));
-    para->settopBcValues(gridPath + input->getValue("topBcValues"));
-    para->setbottomBcQs(gridPath + input->getValue("bottomBcQs"));
-    para->setbottomBcValues(gridPath + input->getValue("bottomBcValues"));
-    para->setfrontBcQs(gridPath + input->getValue("frontBcQs"));
-    para->setfrontBcValues(gridPath + input->getValue("frontBcValues"));
-    para->setbackBcQs(gridPath + input->getValue("backBcQs"));
-    para->setbackBcValues(gridPath + input->getValue("backBcValues"));
-    para->setnumberNodes(gridPath + input->getValue("numberNodes"));
-    para->setLBMvsSI(gridPath + input->getValue("LBMvsSI"));
-    //////////////////////////////gridPath + ////////////////////////////////////////////
-    para->setmeasurePoints(gridPath + input->getValue("measurePoints"));
-    para->setpropellerValues(gridPath + input->getValue("propellerValues"));
-    para->setclockCycleForMP(StringUtil::toFloat(input->getValue("measureClockCycle")));
-    para->settimestepForMP(StringUtil::toInt(input->getValue("measureTimestep")));
-    para->setcpTop(gridPath + input->getValue("cpTop"));
-    para->setcpBottom(gridPath + input->getValue("cpBottom"));
-    para->setcpBottom2(gridPath + input->getValue("cpBottom2"));
-    para->setConcentration(gridPath + input->getValue("Concentration"));
-    //////////////////////////////////////////////////////////////////////////
-    //Normals - Geometry
-    para->setgeomBoundaryNormalX(gridPath + input->getValue("geomBoundaryNormalX"));
-    para->setgeomBoundaryNormalY(gridPath + input->getValue("geomBoundaryNormalY"));
-    para->setgeomBoundaryNormalZ(gridPath + input->getValue("geomBoundaryNormalZ"));
-    //Normals - Inlet
-    para->setInflowBoundaryNormalX(gridPath + input->getValue("inletBoundaryNormalX"));
-    para->setInflowBoundaryNormalY(gridPath + input->getValue("inletBoundaryNormalY"));
-    para->setInflowBoundaryNormalZ(gridPath + input->getValue("inletBoundaryNormalZ"));
-    //Normals - Outlet
-    para->setOutflowBoundaryNormalX(gridPath + input->getValue("outletBoundaryNormalX"));
-    para->setOutflowBoundaryNormalY(gridPath + input->getValue("outletBoundaryNormalY"));
-    para->setOutflowBoundaryNormalZ(gridPath + input->getValue("outletBoundaryNormalZ"));
-    //////////////////////////////////////////////////////////////////////////
-    //Forcing
-    para->setForcing(StringUtil::toFloat(input->getValue("ForcingX")), StringUtil::toFloat(input->getValue("ForcingY")), StringUtil::toFloat(input->getValue("ForcingZ")));
-    //////////////////////////////////////////////////////////////////////////
-    //Particles
-    para->setCalcParticles(StringUtil::toBool(input->getValue("calcParticles")));
-    para->setParticleBasicLevel(StringUtil::toInt(input->getValue("baseLevel")));
-    para->setParticleInitLevel(StringUtil::toInt(input->getValue("initLevel")));
-    para->setNumberOfParticles(StringUtil::toInt(input->getValue("numberOfParticles")));
-    para->setneighborWSB(gridPath + input->getValue("neighborWSB"));
-    para->setStartXHotWall(StringUtil::toDouble(input->getValue("startXHotWall")));
-    para->setEndXHotWall(StringUtil::toDouble(input->getValue("endXHotWall")));
-    //////////////////////////////////////////////////////////////////////////
-    //for Multi GPU
-    if (para->getNumprocs() > 1)
-    {
-        ////////////////////////////////////////////////////////////////////////////
-        ////1D domain decomposition
-        //std::vector<std::string> sendProcNeighbors;
-        //std::vector<std::string> recvProcNeighbors;
-        //for (int i = 0; i<para->getNumprocs();i++)
-        //{
-        // sendProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "s.dat");
-        // recvProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "r.dat");
-        //}
-        //para->setPossNeighborFiles(sendProcNeighbors, "send");
-        //para->setPossNeighborFiles(recvProcNeighbors, "recv");
-        //////////////////////////////////////////////////////////////////////////
-        //3D domain decomposition
-        std::vector<std::string> sendProcNeighborsX, sendProcNeighborsY, sendProcNeighborsZ;
-        std::vector<std::string> recvProcNeighborsX, recvProcNeighborsY, recvProcNeighborsZ;
-        for (int i = 0; i < para->getNumprocs(); i++)
-        {
-            sendProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xs.dat");
-            sendProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Ys.dat");
-            sendProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zs.dat");
-            recvProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xr.dat");
-            recvProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Yr.dat");
-            recvProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zr.dat");
-        }
-        para->setPossNeighborFilesX(sendProcNeighborsX, "send");
-        para->setPossNeighborFilesY(sendProcNeighborsY, "send");
-        para->setPossNeighborFilesZ(sendProcNeighborsZ, "send");
-        para->setPossNeighborFilesX(recvProcNeighborsX, "recv");
-        para->setPossNeighborFilesY(recvProcNeighborsY, "recv");
-        para->setPossNeighborFilesZ(recvProcNeighborsZ, "recv");
-    }
-    //////////////////////////////////////////////////////////////////////////
-    //para->setkFull(             input->getValue( "kFull" ));
-    //para->setgeoFull(           input->getValue( "geoFull" ));
-    //para->setnoSlipBcPos(       input->getValue( "noSlipBcPos" ));
-    //para->setnoSlipBcQs(          input->getValue( "noSlipBcQs" ));
-    //para->setnoSlipBcValues(      input->getValue( "noSlipBcValues" ));
-    //para->setnoSlipBcValue(     input->getValue( "noSlipBcValue" ));
-    //para->setslipBcPos(         input->getValue( "slipBcPos" ));
-    //para->setslipBcQs(          input->getValue( "slipBcQs" ));
-    //para->setslipBcValue(       input->getValue( "slipBcValue" ));
-    //para->setpressBcPos(        input->getValue( "pressBcPos" ));
-    //para->setpressBcQs(           input->getValue( "pressBcQs" ));
-    //para->setpressBcValues(       input->getValue( "pressBcValues" ));
-    //para->setpressBcValue(      input->getValue( "pressBcValue" ));
-    //para->setvelBcQs(             input->getValue( "velBcQs" ));
-    //para->setvelBcValues(         input->getValue( "velBcValues" ));
-    //para->setpropellerCylinder( input->getValue( "propellerCylinder" ));
-    //para->setpropellerQs(		 input->getValue( "propellerQs"      ));
-    //para->setwallBcQs(            input->getValue( "wallBcQs"         ));
-    //para->setwallBcValues(        input->getValue( "wallBcValues"     ));
-    //para->setperiodicBcQs(        input->getValue( "periodicBcQs"     ));
-    //para->setperiodicBcValues(    input->getValue( "periodicBcValues" ));
-    //cout << "Try this: " << para->getgeomBoundaryBcValues() << endl;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //Restart
-    para->setTimeDoCheckPoint(StringUtil::toInt(input->getValue("TimeDoCheckPoint")));
-    para->setTimeDoRestart(StringUtil::toInt(input->getValue("TimeDoRestart")));
-    para->setDoCheckPoint(StringUtil::toBool(input->getValue("DoCheckPoint")));
-    para->setDoRestart(StringUtil::toBool(input->getValue("DoRestart")));
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    para->setMaxLevel(StringUtil::toInt(input->getValue("NOGL")));
-    para->setGridX(StringUtil::toIntVector(input->getValue("GridX")));                           
-    para->setGridY(StringUtil::toIntVector(input->getValue("GridY")));                           
-    para->setGridZ(StringUtil::toIntVector(input->getValue("GridZ")));                  
-    para->setDistX(StringUtil::toIntVector(input->getValue("DistX")));                  
-    para->setDistY(StringUtil::toIntVector(input->getValue("DistY")));                  
-    para->setDistZ(StringUtil::toIntVector(input->getValue("DistZ")));                
-
-    para->setNeedInterface(std::vector<bool>{true, true, true, true, true, true});
-}
-
-
-
-void multipleLevel(const std::string& configPath)
-{
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
-    SPtr<Parameter> para = Parameter::make();
-    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-    SPtr<GridProvider> gridGenerator;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool useGridGenerator = true;
-
-    if(useGridGenerator){
-
-        enum testCase{
-			TGV,
-			TGV3D,
-			Sphere,
-			DrivAer,
-            DLC,
-            MultiGPU
-        };
-
-        int testcase = TGV3D;
-        
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		if (testcase == TGV)
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		{
-			real dx = 1.0;
-			real vx = 0.049;
-			//////////////////////////////////////////////////////////////////////////
-			//32
-			gridBuilder->addCoarseGrid(-24, -2, -16,
-										24,  2,  16, dx);
-			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->buildGrids(true); 
-			//////////////////////////////////////////////////////////////////////////
-			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
-			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->writeGridsToVtk("E:/temp/TaylorGreenVortex/results/32/TGV32turned_Grid");
-			gridBuilder->writeArrows("E:/temp/TaylorGreenVortex/results/32/TGV32turned_Grid_arrow");
-			//////////////////////////////////////////////////////////////////////////
-			SimulationFileWriter::write("E:/temp/TaylorGreenVortex/grids/turned/gridUni48x4x32/", gridBuilder, FILEFORMAT::BINARY);
-			//////////////////////////////////////////////////////////////////////////
-			return;
-			gridGenerator = GridGenerator::make(gridBuilder, para);
-		}
-
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		if (testcase == TGV3D)
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		{
-			const real PI = 3.141592653589793238462643383279;
-
-			real dx = 2.0 * PI / 32.0; // 32^3 nodes
-			//real dx = 2.0 * PI / 64.0; // 64^3 nodes
-			//real dx = 2.0 * PI / 128.0; // 128^3 nodes
-			//real dx = 2.0 * PI / 256.0; // 128^3 nodes
-			real vx = 0.049;
-
-			gridBuilder->addCoarseGrid(-PI, -PI, -PI,
-										PI,  PI,  PI, dx);
-
-			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
-
-			gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-			//////////////////////////////////////////////////////////////////////////
-			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
-			//////////////////////////////////////////////////////////////////////////
-			//32
-			gridBuilder->writeGridsToVtk("E:/temp/TaylorGreenVortex/results3D/32/TGV3D_Grid");
-			gridBuilder->writeArrows("E:/temp/TaylorGreenVortex/results3D/32/TGV3D_Grid_arrow");
-			SimulationFileWriter::write("E:/temp/TaylorGreenVortex/grids3D/gridTGV3D/32/", gridBuilder, FILEFORMAT::BINARY); //FILEFORMAT::ASCII
-			//256
-		    //gridBuilder->writeGridsToVtk("E:/temp/TaylorGreenVortex/results3D/256/TGV3D_Grid");
-			//gridBuilder->writeArrows("E:/temp/TaylorGreenVortex/results3D/256/TGV3D_Grid_arrow");
-			//SimulationFileWriter::write("E:/temp/TaylorGreenVortex/grids3D/gridTGV3D/256/", gridBuilder, FILEFORMAT::BINARY); //FILEFORMAT::ASCII
-
-			//return;
-
-			gridGenerator = GridGenerator::make(gridBuilder, para);
-		}
-
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        if( testcase == Sphere)
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        {
-            real dx = 0.2;
-            real vx = 0.05;
-
-            auto SphereSTL = std::make_shared<TriangularMesh>("E:/temp/GridSphere/2018/STL/SphereNotOptimal.stl");
-
-            gridBuilder->addCoarseGrid(- 5.0, -5.0, -5.0,
-                                        10.0,  5.0,  5.0, dx);  // DrivAer
-
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid(SphereSTL, 2);
-        
-            gridBuilder->addGeometry(SphereSTL);
-
-            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-            //////////////////////////////////////////////////////////////////////////
-            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
-
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-
-            bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-            bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-            bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
-            //////////////////////////////////////////////////////////////////////////
-            SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->writeGridsToVtk("E:/temp/GridSphere/2018/grids/outSphere/SphereBig3_Grid");
-            gridBuilder->writeArrows    ("E:/temp/GridSphere/2018/grids/outSphere/SphereBig3_Grid_arrow");
-
-            SimulationFileWriter::write("E:/temp/GridSphere/2018/grids/gridSphere/3lev_dxC_0_2/", gridBuilder, FILEFORMAT::BINARY); //FILEFORMAT::ASCII
-
-            //return;
-
-            gridGenerator = GridGenerator::make(gridBuilder, para);
-        }
-
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        if( testcase == DrivAer )
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        {
-            real dx = 0.2;
-            real vx = 0.05;
-
-            auto DrivAerSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_Fastback_Coarse.stl");
-            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_NoSTLGroups.stl");
-            //auto triangularMesh = std::make_shared<TriangularMesh>("M:/TestGridGeneration/STL/DrivAer_Coarse.stl");
-            //auto DrivAerSTL = std::make_shared<TriangularMesh>("stl/DrivAer_Fastback_Coarse.stl");
-
-            auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/DrivAer_REF_BOX_Adrea.stl");
-            //auto DrivAerRefBoxSTL = std::make_shared<TriangularMesh>("stl/DrivAer_REF_BOX_Adrea.stl");
-
-            real z0 = 0.318+0.5*dx;
-
-            gridBuilder->addCoarseGrid(- 5.0, -5.0, 0.0 - z0,
-                                        15.0,  5.0, 5.0 - z0, dx);  // DrivAer
-
-            //Object* floorBox = std::make_shared<Cuboid>( -0.3, -1, -1, 4.0, 1, 0.2 );
-            //Object* wakeBox  = std::make_shared<Cuboid>(  3.5, -1, -1, 5.5, 1, 0.8 );
-
-            //Conglomerate* refRegion = new Conglomerate();
-
-            //refRegion->add(floorBox);
-            //refRegion->add(wakeBox);
-            //refRegion->add(DrivAerRefBoxSTL);
-
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid(DrivAerRefBoxSTL, 2);
-        
-            //gridBuilder->setNumberOfLayers(10,8);
-            //gridBuilder->addGrid(DrivAerSTL, 5);
-
-            gridBuilder->addGeometry(DrivAerSTL);
-
-            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
-
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-
-            bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
-            bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-            bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
-
-            //////////////////////////////////////////////////////////////////////////
-
-            SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
-
-            gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, 4, 0.0075, -2.0, 0.0,
-                                                                                                                                     0.0075,  2.0, 0.0, -vx, 0.318);
-            gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, 3, 2.793 , -2.0, 0.0,
-                                                                                                                                     2.793 ,  2.0, 0.0, -vx, 0.318);
-
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/DrivAer_Grid");
-            gridBuilder->writeArrows    ("F:/Work/Computations/gridGenerator/grid/DrivAer_Grid_arrow");
-
-            //SimulationFileWriter::write("D:/GRIDGENERATION/files/", gridBuilder, FILEFORMAT::ASCII);
-            //SimulationFileWriter::write("C:/Users/lenz/Desktop/Work/gridGenerator/grid/", gridBuilder, FILEFORMAT::ASCII);
-            SimulationFileWriter::write("F:/Work/Computations/gridGenerator/grid/", gridBuilder, FILEFORMAT::BINARY);
-            //SimulationFileWriter::write("grid/", gridBuilder, FILEFORMAT::ASCII);
-
-            return;
-
-            gridGenerator = GridGenerator::make(gridBuilder, para);
-        }
-
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        if( testcase == DLC )
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        {
-			real velocityRatio = 594.093427;
-
-			real dx = 0.2;
-			real vx = 0.065272188;
-
-			real z0 = 0.24395 + 0.5*dx;
-
-            std::vector<uint> ignorePatches = { 152, 153, 154 };
-
-            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/VW370_SERIE.stl", ignorePatches);
-            auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
-
-            //auto DLC_RefBox = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox.stl");
-
-            //auto DLC_RefBox_1 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_4m.stl");
-            //auto DLC_RefBox_2 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_3m.stl");
-            //auto DLC_RefBox_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_2m.stl");
-            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.stl");
-
-            //auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_3.stl");
-            //auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_4.stl");
-            //auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("C:/Users/lenz/Desktop/Work/gridGenerator/stl/DLC/DLC_RefBox_Level_5.stl");
-
-            auto DLC_RefBox_Level_3 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_3.stl");
-            auto DLC_RefBox_Level_4 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_4.stl");
-            auto DLC_RefBox_Level_5 = std::make_shared<TriangularMesh>("stl/DLC/DLC_RefBox_Level_5.stl");
-
-            //auto VW370_SERIE_STL = std::make_shared<TriangularMesh>("stl/VW370_SERIE.stl", ignorePatches);
-            //auto DLC_RefBox = std::make_shared<TriangularMesh>("stl/DLC_RefBox.lnx.stl");
-            //auto DLC_RefBox_4 = std::make_shared<TriangularMesh>("stl/DLC_RefBox_withWake/DLC_RefBox_withWake_1m.lnx.stl");
-
-            gridBuilder->addCoarseGrid(-30.0, -20.0,  0.0 - z0,
-                                        50.0,  20.0, 25.0 - z0, dx);
-            
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid( std::make_shared<Cuboid>( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
-            gridBuilder->addGrid( std::make_shared<Cuboid>( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
-
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid(DLC_RefBox_Level_3, 3);
-            gridBuilder->addGrid(DLC_RefBox_Level_4, 4);
-        
-            Conglomerate* refinement = new Conglomerate();
-            refinement->add(DLC_RefBox_Level_5);
-            refinement->add(VW370_SERIE_STL);
-
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid(refinement, 5);
-
-            gridBuilder->addGeometry(VW370_SERIE_STL);
-
-            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
-
-            gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-
-            bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
-            bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-            bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
-
-            //////////////////////////////////////////////////////////////////////////
-
-            SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
-
-            real wheelsFrontX = -0.081;
-            real wheelsRearX  =  2.5486;
-
-            real wheelsFrontZ =  0.0504;
-            real wheelsRearZ  =  0.057;
-
-            real wheelsRadius =  0.318;
-
-			real wheelRotationFrequency = 1170.74376 / 60.0;
-
-			real wheelTangentialVelocity = -2.0 * M_PI * wheelsRadius * wheelRotationFrequency / velocityRatio;
-
-            std::vector<uint> frontWheelPatches = { 71, 86, 87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97, 159 };
-            std::vector<uint> rearWheelPatches  = { 82, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 160 };
-
-            for( uint patch : frontWheelPatches ){
-                gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, patch, wheelsFrontX, -2.0, wheelsFrontZ,
-                                                                                                                                             wheelsFrontX,  2.0, wheelsFrontZ, 
-					                                                                                                                         wheelTangentialVelocity, wheelsRadius);
-            }
-
-            for( uint patch : rearWheelPatches ){
-                gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, patch, wheelsRearX , -2.0, wheelsRearZ ,
-                                                                                                                                             wheelsRearX ,  2.0, wheelsRearZ , 
-					                                                                                                                         wheelTangentialVelocity, wheelsRadius);
-            }
-
-            //////////////////////////////////////////////////////////////////////////
-
-            //gridBuilder->writeGridsToVtk("C:/Users/lenz/Desktop/Work/gridGenerator/grid/DLC_Grid");
-            //gridBuilder->writeArrows    ("C:/Users/lenz/Desktop/Work/gridGenerator/grid/DLC_Grid_arrow");
-
-            gridBuilder->writeGridsToVtk("grid/DLC_Grid");
-            gridBuilder->writeArrows    ("grid/DLC_Grid_arrow");
-
-            //SimulationFileWriter::write("D:/GRIDGENERATION/files/", gridBuilder, FILEFORMAT::ASCII);
-            //SimulationFileWriter::write("C:/Users/lenz/Desktop/Work/gridGenerator/grid/", gridBuilder, FILEFORMAT::ASCII);
-            SimulationFileWriter::write("grid/", gridBuilder, FILEFORMAT::ASCII);
-
-            gridGenerator = GridGenerator::make(gridBuilder, para);
-        }
-
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        if( testcase == MultiGPU )
-        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        {
-            //const uint generatePart = 1;
-            const uint generatePart = Communicator::getInstanz()->getPID();
-            
-            std::ofstream logFile2;
-            
-            if( generatePart == 0 )
-                logFile2.open( "F:/Work/Computations/gridGenerator/grid/0/gridGeneratorLog.txt" );
-                //logFile2.open( "grid/0/gridGeneratorLog.txt" );
-            
-            if( generatePart == 1 )
-                logFile2.open( "F:/Work/Computations/gridGenerator/grid/1/gridGeneratorLog.txt" );
-                //logFile2.open( "grid/1/gridGeneratorLog.txt" );
-
-            real dx = 1.0 / 40.0;
-            real vx = 0.05;
-
-            auto triangularMesh = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
-            //auto triangularMesh = std::make_shared<TriangularMesh>("stl/ShpereNotOptimal.lnx.stl");
-
-            // all
-            //gridBuilder->addCoarseGrid(-2, -2, -2,  
-            //                            4,  2,  2, dx);
-
-            real overlap = 10.0 * dx;
-
-            if( generatePart == 0 )
-                gridBuilder->addCoarseGrid(-2.0          , -2.0, -2.0,  
-                                            0.5 + overlap,  2.0,  2.0, dx);
-
-            if( generatePart == 1 )
-                gridBuilder->addCoarseGrid( 0.5 - overlap, -2.0, -2.0,  
-                                            4.0          ,  2.0,  2.0, dx);
-
-
-            gridBuilder->setNumberOfLayers(10,8);
-            gridBuilder->addGrid(triangularMesh, 1);
-
-            gridBuilder->addGeometry(triangularMesh);
-            
-            if( generatePart == 0 )
-                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0, 0.5, 
-                                                                             -2.0, 2.0, 
-                                                                             -2.0, 2.0 ) );
-            
-            if( generatePart == 1 )
-                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.5, 4.0, 
-                                                                             -2.0, 2.0, 
-                                                                             -2.0, 2.0 ) );
-
-            gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-            gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-            
-            if( generatePart == 0 ){
-                gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
-            }
-            
-            if( generatePart == 1 ){
-                gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
-                gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
-            }
-
-            //////////////////////////////////////////////////////////////////////////
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
-
-            if (generatePart == 0) {
-                gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
-            }
-            if (generatePart == 1) {
-                gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-            }
-
-            gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-
-            bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-            bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-            bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
-            //////////////////////////////////////////////////////////////////////////
-
-            if (generatePart == 0) {
-                //gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/0/Test_");
-                //gridBuilder->writeArrows    ("F:/Work/Computations/gridGenerator/grid/0/Test_Arrow");
-            }
-            if (generatePart == 1) {
-                //gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/1/Test_");
-                //gridBuilder->writeArrows    ("F:/Work/Computations/gridGenerator/grid/1/Test_Arrow");
-            }
-
-            if (generatePart == 0)
-                SimulationFileWriter::write("F:/Work/Computations/gridGenerator/grid/0/", gridBuilder, FILEFORMAT::ASCII);
-                //SimulationFileWriter::write("grid/0/", gridBuilder, FILEFORMAT::ASCII);
-            if (generatePart == 1)
-                SimulationFileWriter::write("F:/Work/Computations/gridGenerator/grid/1/", gridBuilder, FILEFORMAT::ASCII);
-                //SimulationFileWriter::write("grid/1/", gridBuilder, FILEFORMAT::ASCII);
-
-            //return;
-
-            gridGenerator = GridGenerator::make(gridBuilder, para);
-        }
-
-    }
-    else
-    {
-        gridGenerator = GridReader::make(FileFormat::BINARY, para);
-        //gridGenerator = GridReader::make(FileFormat::ASCII, para);
-    }
-
-    logFile.close();
-
-    //return;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-    std::ifstream stream;
-    stream.open(configPath.c_str(), std::ios::in);
-    if (stream.fail())
-        throw std::runtime_error("can not open config file!");
-
-    UPtr<input::Input> input = input::Input::makeInput(stream, "config");
-
-    setParameters(para, input);
-
-    Simulation sim;
-    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
-    sim.init(para, gridGenerator, fileWriter);
-    sim.run();
-}
-
-
-int main( int argc, char* argv[])
-{
-     MPI_Init(&argc, &argv);
-    std::string str, str2; 
-    if ( argv != NULL )
-    {
-        str = static_cast<std::string>(argv[0]);
-        if (argc > 1)
-        {
-            str2 = static_cast<std::string>(argv[1]);
-            try
-            {
-                multipleLevel(str2);
-            }
-            catch (const std::exception& e)
-            {
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (...)
-            {
-                std::cout << "unknown exeption" << std::endl;
-            }
-        }
-        else
-        {
-            try
-            {
-                //multipleLevel("C:/Users/schoen/Desktop/bin/3D/VirtualFluidsGpuCodes/Sphere/configSphere.txt");
-				multipleLevel("C:/Users/schoen/Desktop/bin/3D/VirtualFluidsGpuCodes/TGV3D/configTGV3D.txt");
-			}
-            catch (const std::exception& e)
-            {
-                std::cout << e.what() << std::flush;
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (const std::bad_alloc e)
-            {
-                std::cout << e.what() << std::flush;
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (...)
-            {
-                std::cout << "unknown exeption" << std::endl;
-            }
-
-            std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
-            //MPI_Abort(MPI_COMM_WORLD, -1);
-        }
-    }
-
-
-   /*
-   MPE_Init_log() & MPE_Finish_log() are NOT needed when
-   liblmpe.a is linked with this program.  In that case,
-   MPI_Init() would have called MPE_Init_log() already.
-   */
-#if defined( MPI_LOGGING )
-   MPE_Init_log();
-#endif
-
-#if defined( MPI_LOGGING )
-   if ( argv != NULL )
-      MPE_Finish_log( argv[0] );
-   if ( str != "" )
-      MPE_Finish_log( str.c_str() );
-   else
-      MPE_Finish_log( "TestLog" );
-#endif
-
-   MPI_Finalize();
-   return 0;
-}
diff --git a/apps/gpu/LBM/metisTest/3rdPartyLinking.cmake b/apps/gpu/LBM/metisTest/3rdPartyLinking.cmake
deleted file mode 100644
index 6dd6ba1bc73e73dfbf01a3cc36aaeb3664e1c04c..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/metisTest/3rdPartyLinking.cmake
+++ /dev/null
@@ -1,13 +0,0 @@
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/MPI/Link.cmake)
-linkMPI(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Cuda/Link.cmake)
-linkCuda(${targetName})
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Boost/Link.cmake)
-linkBoost(${targetName} "serialization")
-include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/Metis/Link.cmake)
-linkMetis(${targetName})
-
-if(HULC.BUILD_JSONCPP)
-  include (${CMAKE_SOURCE_DIR}/${cmakeMacroPath}/JsonCpp/Link.cmake)
-  linkJsonCpp(${targetName})
-endif()
\ No newline at end of file
diff --git a/apps/gpu/LBM/metisTest/CMakeLists.txt b/apps/gpu/LBM/metisTest/CMakeLists.txt
deleted file mode 100644
index 6ef6241b52aa9b5dc73a19b82f2b9fd5248c77c5..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/metisTest/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-setTargetNameToFolderName(${CMAKE_CURRENT_LIST_DIR}) 
-
-set(linkDirectories "")
-set(libsToLink VirtualFluids_GPU GridGenerator)
-set(includeDirectories "${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/src/VirtualFluids_GPU" "${CMAKE_SOURCE_DIR}/src/GridGenerator" "${CMAKE_SOURCE_DIR}/src/VirtualFluidsBasics")
-
-#glob files and save in MY_SRCS
-include(CMakePackage.cmake)
-
-buildExe(${targetName} "${MY_SRCS}" "${linkDirectories}" "${libsToLink}" "${includeDirectories}")
-groupTarget(${targetName} ${lbmAppFolder})
-
-# Specify the linking to 3rdParty libs
-include(3rdPartyLinking.cmake)
\ No newline at end of file
diff --git a/apps/gpu/LBM/metisTest/CMakePackage.cmake b/apps/gpu/LBM/metisTest/CMakePackage.cmake
deleted file mode 100644
index 5d39e3804dbd180790629111449a7dc918292430..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/metisTest/CMakePackage.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-#FILE ENDINGS
-resetFileEndingsToCollect()
-addCAndCPPFileTypes()
-addFileEndingToCollect("*.cu")
-addFileEndingToCollect("*.cuh")
-
-#GLOB SOURCE FILES IN MY_SRCS
-unset(MY_SRCS)
-includeRecursiveAllFilesFrom(${targetName} ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
diff --git a/apps/gpu/LBM/metisTest/main.cpp b/apps/gpu/LBM/metisTest/main.cpp
deleted file mode 100644
index 9edfb9e853f1c4fff6872b15e93d4de94bd35095..0000000000000000000000000000000000000000
--- a/apps/gpu/LBM/metisTest/main.cpp
+++ /dev/null
@@ -1,829 +0,0 @@
-//#define MPI_LOGGING
-
-
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
-
-#include <string>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#include "metis.h"
-
-#include "Input/Input.h"
-#include "StringUtilities/StringUtil.h"
-
-#include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-
-#include "global.h"
-
-#include "geometries/Sphere/Sphere.h"
-#include "geometries/VerticalCylinder/VerticalCylinder.h"
-#include "geometries/Cuboid/Cuboid.h"
-#include "geometries/TriangularMesh/TriangularMesh.h"
-#include "geometries/Conglomerate/Conglomerate.h"
-#include "geometries/TriangularMesh/TriangularMeshStrategy.h"
-
-#include "grid/GridBuilder/LevelGridBuilder.h"
-#include "grid/GridBuilder/MultipleGridBuilder.h"
-#include "grid/BoundaryConditions/Side.h"
-#include "grid/BoundaryConditions/BoundaryCondition.h"
-#include "grid/GridFactory.h"
-
-#include "io/SimulationFileWriter/SimulationFileWriter.h"
-#include "io/GridVTKWriter/GridVTKWriter.h"
-#include "io/STLReaderWriter/STLReader.h"
-#include "io/STLReaderWriter/STLWriter.h"
-
-#include "utilities/math/Math.h"
-#include "utilities/communication.h"
-#include "utilities/transformator/TransformatorImp.h"
-
-using namespace vf::gpu;
-
-std::string getGridPath(std::shared_ptr<Parameter> para, std::string Gridpath)
-{
-    if (para->getNumprocs() == 1)
-        return Gridpath + "/";
-    
-    return Gridpath + "/" + StringUtil::toString(para->getMyID()) + "/";
-}
-
-void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input> &input)
-{
-	Communicator* comm = Communicator::getInstanz();
-
-	para->setMaxDev(StringUtil::toInt(input->getValue("NumberOfDevices")));
-	para->setNumprocs(comm->getNumberOfProcess());
-	para->setDevices(StringUtil::toUintVector(input->getValue("Devices")));
-	para->setMyID(comm->getPID());
-	
-	std::string _path = input->getValue("Path");
-    std::string _prefix = input->getValue("Prefix");
-    std::string _gridpath = input->getValue("GridPath");
-    std::string gridPath = getGridPath(para, _gridpath);
-    para->setOutputPath(_path);
-    para->setOutputPrefix(_prefix);
-    para->setPathAndFilename(_path + "/" + _prefix);
-    para->setPrintFiles(false);
-    para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
-    para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
-    para->setCalc2ndOrderMoments(StringUtil::toBool(input->getValue("calc2ndOrderMoments")));
-    para->setCalc3rdOrderMoments(StringUtil::toBool(input->getValue("calc3rdOrderMoments")));
-    para->setCalcHighOrderMoments(StringUtil::toBool(input->getValue("calcHigherOrderMoments")));
-    para->setReadGeo(StringUtil::toBool(input->getValue("ReadGeometry")));
-    para->setCalcMedian(StringUtil::toBool(input->getValue("calcMedian")));
-    para->setConcFile(StringUtil::toBool(input->getValue("UseConcFile")));
-    para->setUseMeasurePoints(StringUtil::toBool(input->getValue("UseMeasurePoints")));
-    para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
-    para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
-    para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
-    para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
-    para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
-    para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
-    para->setPressOutID(StringUtil::toInt(input->getValue("PressOutID")));
-    para->setPressInZ(StringUtil::toInt(input->getValue("PressInZ")));
-    para->setPressOutZ(StringUtil::toInt(input->getValue("PressOutZ")));
-    //////////////////////////////////////////////////////////////////////////
-    para->setDiffOn(StringUtil::toBool(input->getValue("DiffOn")));
-    para->setDiffMod(StringUtil::toInt(input->getValue("DiffMod")));
-    para->setDiffusivity(StringUtil::toFloat(input->getValue("Diffusivity")));
-    para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
-    para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
-    //////////////////////////////////////////////////////////////////////////
-    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
-    para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
-    para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
-    para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
-    para->setPressRatio(StringUtil::toFloat(input->getValue("Delta_Press")));
-    para->setRealX(StringUtil::toFloat(input->getValue("SliceRealX")));
-    para->setRealY(StringUtil::toFloat(input->getValue("SliceRealY")));
-    para->setFactorPressBC(StringUtil::toFloat(input->getValue("dfpbc")));
-    para->setGeometryFileC(input->getValue("GeometryC"));
-    para->setGeometryFileM(input->getValue("GeometryM"));
-    para->setGeometryFileF(input->getValue("GeometryF"));
-    //////////////////////////////////////////////////////////////////////////
-    para->setgeoVec(gridPath + input->getValue("geoVec"));
-    para->setcoordX(gridPath + input->getValue("coordX"));
-    para->setcoordY(gridPath + input->getValue("coordY"));
-    para->setcoordZ(gridPath + input->getValue("coordZ"));
-    para->setneighborX(gridPath + input->getValue("neighborX"));
-    para->setneighborY(gridPath + input->getValue("neighborY"));
-    para->setneighborZ(gridPath + input->getValue("neighborZ"));
-    para->setscaleCFC(gridPath + input->getValue("scaleCFC"));
-    para->setscaleCFF(gridPath + input->getValue("scaleCFF"));
-    para->setscaleFCC(gridPath + input->getValue("scaleFCC"));
-    para->setscaleFCF(gridPath + input->getValue("scaleFCF"));
-    para->setscaleOffsetCF(gridPath + input->getValue("scaleOffsetCF"));
-    para->setscaleOffsetFC(gridPath + input->getValue("scaleOffsetFC"));
-    para->setgeomBoundaryBcQs(gridPath + input->getValue("geomBoundaryBcQs"));
-    para->setgeomBoundaryBcValues(gridPath + input->getValue("geomBoundaryBcValues"));
-    para->setinletBcQs(gridPath + input->getValue("inletBcQs"));
-    para->setinletBcValues(gridPath + input->getValue("inletBcValues"));
-    para->setoutletBcQs(gridPath + input->getValue("outletBcQs"));
-    para->setoutletBcValues(gridPath + input->getValue("outletBcValues"));
-    para->settopBcQs(gridPath + input->getValue("topBcQs"));
-    para->settopBcValues(gridPath + input->getValue("topBcValues"));
-    para->setbottomBcQs(gridPath + input->getValue("bottomBcQs"));
-    para->setbottomBcValues(gridPath + input->getValue("bottomBcValues"));
-    para->setfrontBcQs(gridPath + input->getValue("frontBcQs"));
-    para->setfrontBcValues(gridPath + input->getValue("frontBcValues"));
-    para->setbackBcQs(gridPath + input->getValue("backBcQs"));
-    para->setbackBcValues(gridPath + input->getValue("backBcValues"));
-    para->setnumberNodes(gridPath + input->getValue("numberNodes"));
-    para->setLBMvsSI(gridPath + input->getValue("LBMvsSI"));
-    //////////////////////////////gridPath + ////////////////////////////////////////////
-    para->setmeasurePoints(gridPath + input->getValue("measurePoints"));
-    para->setpropellerValues(gridPath + input->getValue("propellerValues"));
-    para->setclockCycleForMP(StringUtil::toFloat(input->getValue("measureClockCycle")));
-    para->settimestepForMP(StringUtil::toInt(input->getValue("measureTimestep")));
-    para->setcpTop(gridPath + input->getValue("cpTop"));
-    para->setcpBottom(gridPath + input->getValue("cpBottom"));
-    para->setcpBottom2(gridPath + input->getValue("cpBottom2"));
-    para->setConcentration(gridPath + input->getValue("Concentration"));
-    //////////////////////////////////////////////////////////////////////////
-    //Normals - Geometry
-    para->setgeomBoundaryNormalX(gridPath + input->getValue("geomBoundaryNormalX"));
-    para->setgeomBoundaryNormalY(gridPath + input->getValue("geomBoundaryNormalY"));
-    para->setgeomBoundaryNormalZ(gridPath + input->getValue("geomBoundaryNormalZ"));
-    //Normals - Inlet
-    para->setInflowBoundaryNormalX(gridPath + input->getValue("inletBoundaryNormalX"));
-    para->setInflowBoundaryNormalY(gridPath + input->getValue("inletBoundaryNormalY"));
-    para->setInflowBoundaryNormalZ(gridPath + input->getValue("inletBoundaryNormalZ"));
-    //Normals - Outlet
-    para->setOutflowBoundaryNormalX(gridPath + input->getValue("outletBoundaryNormalX"));
-    para->setOutflowBoundaryNormalY(gridPath + input->getValue("outletBoundaryNormalY"));
-    para->setOutflowBoundaryNormalZ(gridPath + input->getValue("outletBoundaryNormalZ"));
-    //////////////////////////////////////////////////////////////////////////
-    //Forcing
-    para->setForcing(StringUtil::toFloat(input->getValue("ForcingX")), StringUtil::toFloat(input->getValue("ForcingY")), StringUtil::toFloat(input->getValue("ForcingZ")));
-    //////////////////////////////////////////////////////////////////////////
-    //Particles
-    para->setCalcParticles(StringUtil::toBool(input->getValue("calcParticles")));
-    para->setParticleBasicLevel(StringUtil::toInt(input->getValue("baseLevel")));
-    para->setParticleInitLevel(StringUtil::toInt(input->getValue("initLevel")));
-    para->setNumberOfParticles(StringUtil::toInt(input->getValue("numberOfParticles")));
-    para->setneighborWSB(gridPath + input->getValue("neighborWSB"));
-    para->setStartXHotWall(StringUtil::toDouble(input->getValue("startXHotWall")));
-    para->setEndXHotWall(StringUtil::toDouble(input->getValue("endXHotWall")));
-    //////////////////////////////////////////////////////////////////////////
-    //for Multi GPU
-    if (para->getNumprocs() > 1)
-    {
-        ////////////////////////////////////////////////////////////////////////////
-        ////1D domain decomposition
-        //std::vector<std::string> sendProcNeighbors;
-        //std::vector<std::string> recvProcNeighbors;
-        //for (int i = 0; i<para->getNumprocs();i++)
-        //{
-        // sendProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "s.dat");
-        // recvProcNeighbors.push_back(gridPath + StringUtil::toString(i) + "r.dat");
-        //}
-        //para->setPossNeighborFiles(sendProcNeighbors, "send");
-        //para->setPossNeighborFiles(recvProcNeighbors, "recv");
-        //////////////////////////////////////////////////////////////////////////
-        //3D domain decomposition
-        std::vector<std::string> sendProcNeighborsX, sendProcNeighborsY, sendProcNeighborsZ;
-        std::vector<std::string> recvProcNeighborsX, recvProcNeighborsY, recvProcNeighborsZ;
-        for (int i = 0; i < para->getNumprocs(); i++)
-        {
-            sendProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xs.dat");
-            sendProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Ys.dat");
-            sendProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zs.dat");
-            recvProcNeighborsX.push_back(gridPath + StringUtil::toString(i) + "Xr.dat");
-            recvProcNeighborsY.push_back(gridPath + StringUtil::toString(i) + "Yr.dat");
-            recvProcNeighborsZ.push_back(gridPath + StringUtil::toString(i) + "Zr.dat");
-        }
-        para->setPossNeighborFilesX(sendProcNeighborsX, "send");
-        para->setPossNeighborFilesY(sendProcNeighborsY, "send");
-        para->setPossNeighborFilesZ(sendProcNeighborsZ, "send");
-        para->setPossNeighborFilesX(recvProcNeighborsX, "recv");
-        para->setPossNeighborFilesY(recvProcNeighborsY, "recv");
-        para->setPossNeighborFilesZ(recvProcNeighborsZ, "recv");
-    }
-    //////////////////////////////////////////////////////////////////////////
-    //para->setkFull(             input->getValue( "kFull" ));
-    //para->setgeoFull(           input->getValue( "geoFull" ));
-    //para->setnoSlipBcPos(       input->getValue( "noSlipBcPos" ));
-    //para->setnoSlipBcQs(          input->getValue( "noSlipBcQs" ));
-    //para->setnoSlipBcValues(      input->getValue( "noSlipBcValues" ));
-    //para->setnoSlipBcValue(     input->getValue( "noSlipBcValue" ));
-    //para->setslipBcPos(         input->getValue( "slipBcPos" ));
-    //para->setslipBcQs(          input->getValue( "slipBcQs" ));
-    //para->setslipBcValue(       input->getValue( "slipBcValue" ));
-    //para->setpressBcPos(        input->getValue( "pressBcPos" ));
-    //para->setpressBcQs(           input->getValue( "pressBcQs" ));
-    //para->setpressBcValues(       input->getValue( "pressBcValues" ));
-    //para->setpressBcValue(      input->getValue( "pressBcValue" ));
-    //para->setvelBcQs(             input->getValue( "velBcQs" ));
-    //para->setvelBcValues(         input->getValue( "velBcValues" ));
-    //para->setpropellerCylinder( input->getValue( "propellerCylinder" ));
-    //para->setpropellerQs(		 input->getValue( "propellerQs"      ));
-    //para->setwallBcQs(            input->getValue( "wallBcQs"         ));
-    //para->setwallBcValues(        input->getValue( "wallBcValues"     ));
-    //para->setperiodicBcQs(        input->getValue( "periodicBcQs"     ));
-    //para->setperiodicBcValues(    input->getValue( "periodicBcValues" ));
-    //cout << "Try this: " << para->getgeomBoundaryBcValues() << endl;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //Restart
-    para->setTimeDoCheckPoint(StringUtil::toInt(input->getValue("TimeDoCheckPoint")));
-    para->setTimeDoRestart(StringUtil::toInt(input->getValue("TimeDoRestart")));
-    para->setDoCheckPoint(StringUtil::toBool(input->getValue("DoCheckPoint")));
-    para->setDoRestart(StringUtil::toBool(input->getValue("DoRestart")));
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    para->setMaxLevel(StringUtil::toInt(input->getValue("NOGL")));
-    para->setGridX(StringUtil::toIntVector(input->getValue("GridX")));                           
-    para->setGridY(StringUtil::toIntVector(input->getValue("GridY")));                           
-    para->setGridZ(StringUtil::toIntVector(input->getValue("GridZ")));                  
-    para->setDistX(StringUtil::toIntVector(input->getValue("DistX")));                  
-    para->setDistY(StringUtil::toIntVector(input->getValue("DistY")));                  
-    para->setDistZ(StringUtil::toIntVector(input->getValue("DistZ")));                
-
-    para->setNeedInterface(std::vector<bool>{true, true, true, true, true, true});
-}
-
-
-
-void multipleLevel(const std::string& configPath)
-{
-    auto gridFactory = GridFactory::make();
-    gridFactory->setGridStrategy(Device::CPU);
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
-    SPtr<Parameter> para = Parameter::make();
-    SPtr<GridProvider> gridGenerator;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    bool useGridGenerator = true;
-
-    if(useGridGenerator){
-        
-        //const uint generatePart = 1;
-        const uint generatePart = Communicator::getInstanz()->getPID();
-            
-        real dx = 1.0 / 20.0;
-        real vx = 0.05;
-
-        auto triangularMesh = std::make_shared<TriangularMesh>("F:/Work/Computations/gridGenerator/stl/ShpereNotOptimal.stl");
-        //auto triangularMesh = std::make_shared<TriangularMesh>("stl/ShpereNotOptimal.lnx.stl");
-
-        // all
-        //gridBuilder->addCoarseGrid(-2, -2, -2,  
-        //                            4,  2,  2, dx);
-
-        real overlap = 10.0 * dx;
-
-        gridBuilder->addCoarseGrid(-2.0, -2.0, -2.0,  
-                                    4.0,  2.0,  2.0, dx);
-
-
-        gridBuilder->setNumberOfLayers(10,8);
-        gridBuilder->addGrid(triangularMesh, 1);
-
-        gridBuilder->addGeometry(triangularMesh);
-
-        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-        gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
-
-        //////////////////////////////////////////////////////////////////////////
-
-        gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-        gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
-
-        gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
-        gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-
-        gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-
-        bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-        bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-        bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
-
-        //////////////////////////////////////////////////////////////////////////
-        gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/Test_");
-        //gridBuilder->writeArrows    ("F:/Work/Computations/gridGenerator/grid/Test_Arrow");
-
-        //SimulationFileWriter::write("F:/Work/Computations/gridGenerator/grid/", gridBuilder, FILEFORMAT::ASCII);
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            
-        if(false)
-        {
-
-            auto getParentIndex = [&] (uint index, uint level) -> uint
-            {
-                SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-                if( level != 0 )
-                {
-                    real x, y, z;
-                    grid->transIndexToCoords(index, x, y, z);
-
-                    SPtr<Grid> coarseGrid = gridBuilder->getGrid(level - 1);
-
-                    for (const auto dir : DistributionHelper::getDistribution27())
-                    {
-                        if (std::abs(dir[0]) < 0.5 || std::abs(dir[1]) < 0.5 || std::abs(dir[2]) < 0.5) continue;
-
-                        real coarseX = x + dir[0] * 0.5 * grid->getDelta();
-                        real coarseY = y + dir[1] * 0.5 * grid->getDelta();
-                        real coarseZ = z + dir[2] * 0.5 * grid->getDelta();
-
-                        // check if close enough to coarse grid coordinates
-                        if( 0.01 * grid->getDelta() < std::abs(         (coarseGrid->getStartX() - coarseX) / grid->getDelta() 
-                                                                - lround( (coarseGrid->getStartX() - coarseX) / grid->getDelta() ) ) ) continue;
-                        if( 0.01 * grid->getDelta() < std::abs(         (coarseGrid->getStartY() - coarseY) / grid->getDelta() 
-                                                                - lround( (coarseGrid->getStartY() - coarseY) / grid->getDelta() ) ) ) continue;
-                        if( 0.01 * grid->getDelta() < std::abs(         (coarseGrid->getStartZ() - coarseZ) / grid->getDelta() 
-                                                                - lround( (coarseGrid->getStartZ() - coarseZ) / grid->getDelta() ) ) ) continue;
-
-                        uint parentIndex = coarseGrid->transCoordToIndex( coarseX, coarseY, coarseZ);
-
-                        return parentIndex;
-                    }
-                }
-
-                return INVALID_INDEX;
-            };
-
-
-            std::vector<idx_t> xadj;
-            std::vector<idx_t> adjncy;
-
-            std::vector<idx_t> vwgt;
-            std::vector<idx_t> adjwgt;
-
-            idx_t vertexCounter = 0;
-            uint edgeCounter = 0;
-
-            std::cout << "Checkpoint 1:" << std::endl;
-
-            std::vector< std::vector<idx_t> > vertexIndex( gridBuilder->getNumberOfLevels() );
-
-            std::vector< uint > startVerticesPerLevel;;
-
-            for( uint level = 0; level < gridBuilder->getNumberOfLevels(); level++ )
-            {
-                SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-                vertexIndex[level].resize( grid->getSize() );
-
-                startVerticesPerLevel.push_back(vertexCounter);
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    if (grid->getSparseIndex(index) == INVALID_INDEX)
-                    {
-                        vertexIndex[level][index] = INVALID_INDEX;
-                        continue;
-                    }
-
-                    uint parentIndex = getParentIndex(index, level);
-
-                    if( parentIndex != INVALID_INDEX )
-                    {
-                        SPtr<Grid> coarseGrid = gridBuilder->getGrid(level - 1);
-
-                        if( coarseGrid->getFieldEntry(parentIndex) == FLUID_CFC ||
-                            coarseGrid->getFieldEntry(parentIndex) == FLUID_FCC ||
-                            coarseGrid->getFieldEntry(parentIndex) == STOPPER_COARSE_UNDER_FINE )
-                        {
-                            //vertexIndex[level][index] = INVALID_INDEX;
-                            vertexIndex[level][index] = vertexIndex[level - 1][parentIndex];
-                            continue;
-                        }
-                    }
-
-                    vertexIndex[level][index] = vertexCounter;
-
-                    vwgt.push_back( std::pow(2, level) );
-                    //vwgt.push_back( std::pow(2, 2*level) );
-                    vertexCounter++;
-                }
-
-            }
-
-            //////////////////////////////////////////////////////////////////////////
-            //for( uint level = 0; level < gridBuilder->getNumberOfLevels(); level++ )
-            //{
-            //    SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-            //    for (uint index = 0; index < grid->getSize(); index++)
-            //    {
-            //        grid->setFieldEntry(index, vertexIndex[level][index] >= startVerticesPerLevel[level] && vertexIndex[level][index] != INVALID_INDEX);
-            //    }
-            //}
-
-            //gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/VertexIndex_");
-
-            //return;
-            //////////////////////////////////////////////////////////////////////////
-
-
-            std::cout << "Checkpoint 2:" << std::endl;
-                
-            for( uint level = 0; level < gridBuilder->getNumberOfLevels(); level++ )
-            {
-                SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    //if (grid->getSparseIndex(index) == INVALID_INDEX) continue;
-
-                    if( vertexIndex[level][index] == INVALID_INDEX ) continue;
-
-                    if( vertexIndex[level][index] < startVerticesPerLevel[level] ) continue;
-
-                    xadj.push_back(edgeCounter);
-
-                    real x, y, z;
-                    grid->transIndexToCoords(index, x, y, z);
-
-                    for (const auto dir : DistributionHelper::getDistribution27())
-                    {
-                        const uint neighborIndex = grid->transCoordToIndex(x + dir[0] * grid->getDelta(), 
-                                                                            y + dir[1] * grid->getDelta(), 
-                                                                            z + dir[2] * grid->getDelta());
-
-                        if (neighborIndex == INVALID_INDEX) continue;
-
-                        if (neighborIndex == index) continue;
-
-                        if( vertexIndex[level][neighborIndex] == INVALID_INDEX ) continue;
-
-                        adjncy.push_back( vertexIndex[level][neighborIndex] );
-                        adjwgt.push_back( std::pow(2, level) );
-
-                        edgeCounter++;
-                    }
-
-                    if( grid->getFieldEntry(index) == FLUID_CFC ||
-                        grid->getFieldEntry(index) == FLUID_FCC ||
-                        grid->getFieldEntry(index) == STOPPER_COARSE_UNDER_FINE )
-
-                    {
-                        SPtr<Grid> fineGrid = gridBuilder->getGrid(level + 1);
-
-                        for (const auto dir : DistributionHelper::getDistribution27())
-                        {
-                            if (std::abs(dir[0]) < 0.5 || std::abs(dir[1]) < 0.5 || std::abs(dir[2]) < 0.5) continue;
-
-                            real fineX = x + dir[0] * 0.25 * grid->getDelta();
-                            real fineY = y + dir[1] * 0.25 * grid->getDelta();
-                            real fineZ = z + dir[2] * 0.25 * grid->getDelta();
-
-                            uint childIndex = fineGrid->transCoordToIndex(fineX, fineY, fineZ);
-
-                            if( fineGrid->getFieldEntry(childIndex) == INVALID_INDEX ) continue;
-                            if( vertexIndex[level + 1][childIndex]  == INVALID_INDEX ) continue;
-
-                            for (const auto dir : DistributionHelper::getDistribution27())
-                            {
-                                const uint neighborIndex = fineGrid->transCoordToIndex( fineX + dir[0] * fineGrid->getDelta(), 
-                                                                                        fineY + dir[1] * fineGrid->getDelta(), 
-                                                                                        fineZ + dir[2] * fineGrid->getDelta() );
-
-                                if(neighborIndex == INVALID_INDEX) continue;
-
-                                if (neighborIndex == childIndex) continue;
-
-                                if( vertexIndex[level + 1][neighborIndex] == INVALID_INDEX ) continue;
-
-                                adjncy.push_back( vertexIndex[level + 1][neighborIndex] );
-                                adjwgt.push_back( std::pow(2, level) );
-
-                                edgeCounter++;
-                            }
-                        }
-                    }
-                }
-            }
-
-            xadj.push_back( edgeCounter );
-
-            std::cout << "Checkpoint 3:" << std::endl;
-                
-            idx_t nWeights  = 1;
-            idx_t nParts    = 4;
-            idx_t objval    = 0;
-
-            std::vector<idx_t> part( vertexCounter );
-                
-            std::cout << vertexCounter << std::endl;
-            std::cout << edgeCounter << std::endl;
-            std::cout << xadj.size()  << std::endl;
-            std::cout << adjncy.size() << std::endl;
-
-            //int ret = METIS_PartGraphRecursive(&vertexCounter, &nWeights, xadj.data(), adjncy.data(),
-            // 				                   vwgt.data(), NULL, adjwgt.data(), &nParts, 
-            //                                   NULL, NULL, NULL, &objval, part.data());
-
-            int ret = METIS_PartGraphKway(&vertexCounter, &nWeights, xadj.data(), adjncy.data(),
-                 				            vwgt.data(), NULL, NULL/*adjwgt.data()*/, &nParts, 
-                                            NULL, NULL, NULL, &objval, part.data());
-
-            std::cout << "objval:" << objval << std::endl;
-
-            std::cout << "Checkpoint 4:" << std::endl;
-
-            //uint partCounter = 0;
-                
-            for( uint level = 0; level < gridBuilder->getNumberOfLevels(); level++ )
-            {
-                SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    if (grid->getSparseIndex(index) == INVALID_INDEX) continue;
-
-                    grid->setFieldEntry(index, part[vertexIndex[level][index]]);
-
-                    //partCounter++;
-                }
-            }
-
-            std::cout << "Checkpoint 5:" << std::endl;
-
-            gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/Partition_");
-
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            
-        {
-
-            for( int level = gridBuilder->getNumberOfLevels()-1; level >= 0 ; level-- )
-            {
-                std::vector< std::vector<idx_t> > vertexIndex( gridBuilder->getNumberOfLevels() );
-
-                std::vector<idx_t> xadj;
-                std::vector<idx_t> adjncy;
-
-                std::vector<idx_t> vwgt;
-                std::vector<idx_t> adjwgt;
-
-                idx_t vertexCounter = 0;
-                uint edgeCounter = 0;
-
-                SPtr<Grid> grid = gridBuilder->getGrid( level );
-
-                vertexIndex[level].resize( grid->getSize() );
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    if (grid->getSparseIndex(index) == INVALID_INDEX)
-                    {
-                        vertexIndex[level][index] = INVALID_INDEX;
-                        continue;
-                    }
-
-                    vertexIndex[level][index] = vertexCounter;
-
-                    vwgt.push_back( std::pow(2, level) );
-                    //vwgt.push_back( std::pow(2, 2*level) );
-                    vertexCounter++;
-                }
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    //if (grid->getSparseIndex(index) == INVALID_INDEX) continue;
-
-                    if( vertexIndex[level][index] == INVALID_INDEX ) continue;
-
-                    xadj.push_back(edgeCounter);
-
-                    real x, y, z;
-                    grid->transIndexToCoords(index, x, y, z);
-
-                    for (const auto dir : DistributionHelper::getDistribution27())
-                    {
-                        const uint neighborIndex = grid->transCoordToIndex(x + dir[0] * grid->getDelta(), 
-                                                                            y + dir[1] * grid->getDelta(), 
-                                                                            z + dir[2] * grid->getDelta());
-
-                        if (neighborIndex == INVALID_INDEX) continue;
-
-                        if (neighborIndex == index) continue;
-
-                        if( vertexIndex[level][neighborIndex] == INVALID_INDEX ) continue;
-
-                        adjncy.push_back( vertexIndex[level][neighborIndex] );
-                        adjwgt.push_back( std::pow(2, level) );
-
-                        edgeCounter++;
-                    }
-                }
-
-                xadj.push_back( edgeCounter );
-
-                std::cout << "Checkpoint 3:" << std::endl;
-                
-                idx_t nWeights  = 1;
-                idx_t nParts    = 4;
-                idx_t objval    = 0;
-
-                std::vector<idx_t> part( vertexCounter );
-                
-                std::cout << vertexCounter << std::endl;
-                std::cout << edgeCounter << std::endl;
-                std::cout << xadj.size()  << std::endl;
-                std::cout << adjncy.size() << std::endl;
-
-                int ret = METIS_PartGraphRecursive(&vertexCounter, &nWeights, xadj.data(), adjncy.data(),
-                     				                NULL/*vwgt.data()*/, NULL, NULL/*adjwgt.data()*/, &nParts, 
-                                                    NULL, NULL, NULL, &objval, part.data());
-
-                //int ret = METIS_PartGraphKway(&vertexCounter, &nWeights, xadj.data(), adjncy.data(),
-                 		//	                  NULL/*vwgt.data()*/, NULL, NULL/*adjwgt.data()*/, &nParts, 
-                //                              NULL, NULL, NULL, &objval, part.data());
-
-                std::cout << "objval:" << objval << std::endl;
-
-                std::cout << "Checkpoint 4:" << std::endl;
-
-                for (uint index = 0; index < grid->getSize(); index++)
-                {
-                    if (vertexIndex[level][index] == INVALID_INDEX) continue;
-
-                    if( grid->getFieldEntry(index) == FLUID_CFC ||
-                        grid->getFieldEntry(index) == FLUID_FCC ||
-                        grid->getFieldEntry(index) == STOPPER_COARSE_UNDER_FINE )
-                    {
-                        SPtr<Grid> fineGrid = gridBuilder->getGrid(level+1);
-                            
-                        real x, y, z;
-                        grid->transIndexToCoords(index, x, y, z);
-
-                        for (const auto dir : DistributionHelper::getDistribution27())
-                        {
-                            if (std::abs(dir[0]) < 0.5 || std::abs(dir[1]) < 0.5 || std::abs(dir[2]) < 0.5) continue;
-
-                            real fineX = x + dir[0] * 0.25 * grid->getDelta();
-                            real fineY = y + dir[1] * 0.25 * grid->getDelta();
-                            real fineZ = z + dir[2] * 0.25 * grid->getDelta();
-
-                            uint childIndex = fineGrid->transCoordToIndex(fineX, fineY, fineZ);
-
-                            if( childIndex == INVALID_INDEX ) continue;
-
-                            fineGrid->setFieldEntry(childIndex, part[vertexIndex[level][index]]);
-                            //fineGrid->setFieldEntry(childIndex, grid->getFieldEntry(index));
-                        }
-                    }
-
-                    grid->setFieldEntry(index, part[vertexIndex[level][index]]);
-                }
-            }
-
-            gridBuilder->writeGridsToVtk("F:/Work/Computations/gridGenerator/grid/Partition_");
-
-        }
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        return;
-
-        gridGenerator = GridGenerator::make(gridBuilder, para);
-    }
-    else
-    {
-        gridGenerator = GridReader::make(FileFormat::BINARY, para);
-        //gridGenerator = GridReader::make(FileFormat::ASCII, para);
-    }
-
-    logFile.close();
-
-    //return;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-    std::ifstream stream;
-    stream.open(configPath.c_str(), std::ios::in);
-    if (stream.fail())
-        throw std::runtime_error("can not open config file!");
-
-    UPtr<input::Input> input = input::Input::makeInput(stream, "config");
-
-    setParameters(para, input);
-
-    Simulation sim;
-    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
-    sim.init(para, gridGenerator, fileWriter);
-    sim.run();
-}
-
-
-int main( int argc, char* argv[])
-{
-     MPI_Init(&argc, &argv);
-    std::string str, str2; 
-    if ( argv != NULL )
-    {
-        str = static_cast<std::string>(argv[0]);
-        if (argc > 1)
-        {
-            str2 = static_cast<std::string>(argv[1]);
-            try
-            {
-                multipleLevel(str2);
-            }
-            catch (const std::exception& e)
-            {
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (...)
-            {
-                std::cout << "unknown exeption" << std::endl;
-            }
-        }
-        else
-        {
-            try
-            {
-                multipleLevel("F:/Work/Computations/gridGenerator/inp/configTest.txt");
-            }
-            catch (const std::exception& e)
-            {
-                std::cout << e.what() << std::flush;
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (const std::bad_alloc e)
-            {
-                std::cout << e.what() << std::flush;
-                //MPI_Abort(MPI_COMM_WORLD, -1);
-            }
-            catch (...)
-            {
-                std::cout << "unknown exeption" << std::endl;
-            }
-
-            std::cout << "\nConfiguration file must be set!: lbmgm <config file>" << std::endl << std::flush;
-            //MPI_Abort(MPI_COMM_WORLD, -1);
-        }
-    }
-
-
-   /*
-   MPE_Init_log() & MPE_Finish_log() are NOT needed when
-   liblmpe.a is linked with this program.  In that case,
-   MPI_Init() would have called MPE_Init_log() already.
-   */
-#if defined( MPI_LOGGING )
-   MPE_Init_log();
-#endif
-
-#if defined( MPI_LOGGING )
-   if ( argv != NULL )
-      MPE_Finish_log( argv[0] );
-   if ( str != "" )
-      MPE_Finish_log( str.c_str() );
-   else
-      MPE_Finish_log( "TestLog" );
-#endif
-
-   MPI_Finalize();
-   return 0;
-}
diff --git a/apps/gpu/MusselOyster/CMakeLists.txt b/apps/gpu/MusselOyster/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..966c802b233fd333c7e7b44a57c7f4177b419ca3
--- /dev/null
+++ b/apps/gpu/MusselOyster/CMakeLists.txt
@@ -0,0 +1,3 @@
+PROJECT(MusselOyster LANGUAGES CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES MusselOyster.cpp)
diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/MusselOyster/MusselOyster.cpp
similarity index 98%
rename from apps/gpu/LBM/MusselOyster/MusselOyster.cpp
rename to apps/gpu/MusselOyster/MusselOyster.cpp
index fd7ba15c6bd12ba6161701a1cc6f70b1b2235246..6bb03ae737b6ecda69f3a9ba8f1f5faa69877781 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/MusselOyster/MusselOyster.cpp
@@ -36,7 +36,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
@@ -50,6 +49,7 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "utilities/communication.h"
+#include <parallel/MPICommunicator.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -85,12 +85,12 @@ const std::string simulationName("MusselOyster");
 
 void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    vf::gpu::Communicator &communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
     auto gridBuilder = std::make_shared<MultipleGridBuilder>();
 
     SPtr<Parameter> para =
-        std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+        std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -188,12 +188,12 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
             bivalveRef_1_STL = std::make_shared<TriangularMesh>(stlPath + bivalveType + "_Level1.stl");
 
         if (para->getNumprocs() > 1) {
-            const uint generatePart = vf::gpu::MpiCommunicator::getInstance().getPID();
+            const uint generatePart = communicator.getProcessID();
 
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNumberOfProcess() == 2) {
+            if (communicator.getNumberOfProcesses() == 2) {
                 const real zSplit = 0.0; // round(((double)bbzp + bbzm) * 0.5);
 
                 if (generatePart == 0) {
@@ -244,7 +244,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                 gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNumberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcesses() == 4) {
 
                 const real xSplit = 100.0;
                 const real zSplit = 0.0;
@@ -334,7 +334,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                     gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 }
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNumberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcesses() == 8) {
                 real xSplit = 140.0; // 100.0 // mit groesserem Level 1 140.0
                 real ySplit = 32.0;  // 32.0
                 real zSplit = 0.0;
@@ -574,14 +574,11 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 
 int main(int argc, char *argv[])
 {
-    MPI_Init(&argc, &argv);
-    std::string str, str2, configFile;
-
-    if (argv != NULL) {
+    if (argc > 1) {
 
         try {
             VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
-            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/LBM/MusselOyster/configMusselOyster.txt");
+            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/MusselOyster/configMusselOyster.txt");
             runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
@@ -595,7 +592,5 @@ int main(int argc, char *argv[])
             VF_LOG_CRITICAL("Unknown exception!");
         }
     }
-
-    MPI_Finalize();
     return 0;
 }
diff --git a/apps/gpu/LBM/MusselOyster/configMusselOyster.txt b/apps/gpu/MusselOyster/configMusselOyster.txt
similarity index 82%
rename from apps/gpu/LBM/MusselOyster/configMusselOyster.txt
rename to apps/gpu/MusselOyster/configMusselOyster.txt
index 3d64ef74215db49adfdee6ba569fccb462b98d73..0fca2741b181ace80d293ebd09a4ef852a7c09c5 100644
--- a/apps/gpu/LBM/MusselOyster/configMusselOyster.txt
+++ b/apps/gpu/MusselOyster/configMusselOyster.txt
@@ -1,15 +1,15 @@
 # Tesla 03
-# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/MusselOyster.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/MusselOyster/configMusselOyster.txt"
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/MusselOyster.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/MusselOyster/configMusselOyster.txt"
 # Phoenix
-# mpirun -np 2 "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/MusselOyster/configMusselOyster.txt"
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/MusselOyster/configMusselOyster.txt"
 
 # Phoenix mpich
-# mpirun -np 4 nvprof -f -o MusselOyster.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# mpirun -np 4 nvprof -f -o MusselOyster.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/SphereScaling/configPhoenix4GPU.txt"
 # Phoenix openmpi
-# mpirun -np 4 nvprof -f -o MusselOyster.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# mpirun -np 4 nvprof -f -o MusselOyster.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/MusselOyster" "./VirtualFluids_dev/apps/gpu/SphereScaling/configPhoenix4GPU.txt"
 
 # Aragorn
- ./bin/MusselOyster "../apps/gpu/LBM/MusselOyster/configMusselOyster.txt"
+ ./bin/MusselOyster "../apps/gpu/MusselOyster/configMusselOyster.txt"
 
 ##################################################
 #GPU Mapping
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt b/apps/gpu/MusselOyster/configPhoenix1GPU.txt
similarity index 100%
rename from apps/gpu/LBM/MusselOyster/configPhoenix1GPU.txt
rename to apps/gpu/MusselOyster/configPhoenix1GPU.txt
diff --git a/apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt b/apps/gpu/MusselOyster/configPhoenix8GPU.txt
similarity index 100%
rename from apps/gpu/LBM/MusselOyster/configPhoenix8GPU.txt
rename to apps/gpu/MusselOyster/configPhoenix8GPU.txt
diff --git a/apps/gpu/SphereGPU/CMakeLists.txt b/apps/gpu/SphereGPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a162d6d200e4c0e533764b8f9927314b1f28ee9
--- /dev/null
+++ b/apps/gpu/SphereGPU/CMakeLists.txt
@@ -0,0 +1,3 @@
+PROJECT(SphereGPU LANGUAGES CXX)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES Sphere.cpp)
diff --git a/apps/gpu/LBM/SphereGPU/Sphere.cpp b/apps/gpu/SphereGPU/Sphere.cpp
similarity index 96%
rename from apps/gpu/LBM/SphereGPU/Sphere.cpp
rename to apps/gpu/SphereGPU/Sphere.cpp
index c78ff949bbce96273a5436722ddc8c9f542300d9..40cc55df4b6c666caf2b9481d72274a9330217a8 100644
--- a/apps/gpu/LBM/SphereGPU/Sphere.cpp
+++ b/apps/gpu/SphereGPU/Sphere.cpp
@@ -41,12 +41,14 @@
 #include <string>
 
 //////////////////////////////////////////////////////////////////////////
-#include <basics/PointerDefinitions.h>
 #include <basics/DataTypes.h>
-#include <logger/Logger.h>
 #include <basics/PointerDefinitions.h>
 #include <basics/config/ConfigurationFile.h>
 
+#include <logger/Logger.h>
+
+#include <parallel/MPICommunicator.h>
+
 //////////////////////////////////////////////////////////////////////////
 
 #include "GridGenerator/grid/BoundaryConditions/Side.h"
@@ -61,7 +63,6 @@
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
@@ -70,15 +71,12 @@
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h"
 
-//////////////////////////////////////////////////////////////////////////
-
 int main(int argc, char *argv[])
 {
     try {
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
         //////////////////////////////////////////////////////////////////////////
-
         const bool useConfigFile = true;
 
         const real L = 1.0;
@@ -101,7 +99,7 @@ int main(int argc, char *argv[])
         vf::basics::ConfigurationFile config;
         if (useConfigFile) {
             VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
-            config = vf::basics::loadConfig(argc, argv, "./apps/gpu/LBM/SphereGPU/config.txt");
+            config = vf::basics::loadConfig(argc, argv, "./apps/gpu/SphereGPU/config.txt");
             para = std::make_shared<Parameter>(&config);
         } else {
             para = std::make_shared<Parameter>();
@@ -121,7 +119,7 @@ int main(int argc, char *argv[])
         // auto sphere = std::make_shared<Sphere>(0.0, 0.0, 0.0, dSphere / 2.0);
 
         // use stl
-        std::string stlPath = "./apps/gpu/LBM/SphereGPU/sphere02.stl";
+        std::string stlPath = "./apps/gpu/SphereGPU/sphere02.stl";
         if (useConfigFile && config.contains("STLPath")) {
             stlPath = config.getValue<std::string>("STLPath");
         }
@@ -218,7 +216,7 @@ int main(int argc, char *argv[])
         //////////////////////////////////////////////////////////////////////////
         // setup to copy mesh to simulation
         //////////////////////////////////////////////////////////////////////////
-        vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+        vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
         auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
         SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
diff --git a/apps/gpu/LBM/SphereGPU/config.txt b/apps/gpu/SphereGPU/config.txt
similarity index 100%
rename from apps/gpu/LBM/SphereGPU/config.txt
rename to apps/gpu/SphereGPU/config.txt
diff --git a/apps/gpu/LBM/SphereGPU/sphere02.stl b/apps/gpu/SphereGPU/sphere02.stl
similarity index 100%
rename from apps/gpu/LBM/SphereGPU/sphere02.stl
rename to apps/gpu/SphereGPU/sphere02.stl
diff --git a/apps/gpu/LBM/SphereRefined/CMakeLists.txt b/apps/gpu/SphereRefined/CMakeLists.txt
similarity index 84%
rename from apps/gpu/LBM/SphereRefined/CMakeLists.txt
rename to apps/gpu/SphereRefined/CMakeLists.txt
index 9ede990de732e23a0b914271f7156f851c190fd3..87432021ba02e7b56b5dabade18e98d7978bce46 100644
--- a/apps/gpu/LBM/SphereRefined/CMakeLists.txt
+++ b/apps/gpu/SphereRefined/CMakeLists.txt
@@ -2,7 +2,7 @@ PROJECT(SphereRefined LANGUAGES CUDA CXX)
 
 #LIST(APPEND CS_COMPILER_FLAGS_CXX "-DOMPI_SKIP_MPICXX" )
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES SphereRefined.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES SphereRefined.cpp)
 
 set_source_files_properties(SphereRefined.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/SphereRefined/SphereRefined.cpp b/apps/gpu/SphereRefined/SphereRefined.cpp
similarity index 98%
rename from apps/gpu/LBM/SphereRefined/SphereRefined.cpp
rename to apps/gpu/SphereRefined/SphereRefined.cpp
index 722028eeecc02a90bebebf4c828bdfd482bf2e85..0df8e9e5f0a7c731ffdb16638c7a6e3f4651d9d9 100644
--- a/apps/gpu/LBM/SphereRefined/SphereRefined.cpp
+++ b/apps/gpu/SphereRefined/SphereRefined.cpp
@@ -60,7 +60,6 @@
 
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
@@ -70,12 +69,14 @@
 #include "VirtualFluids_GPU/Factories/GridScalingFactory.h"
 #include "VirtualFluids_GPU/Kernel/Utilities/KernelTypes.h"
 
+#include <parallel/MPICommunicator.h>
+
 //////////////////////////////////////////////////////////////////////////
 
 int main()
 {
     try {
-        vf::gpu::Communicator &communicator = vf::gpu::MpiCommunicator::getInstance();
+        vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
         vf::logging::Logger::initializeLogger();
         //////////////////////////////////////////////////////////////////////////
         // Simulation parameters
diff --git a/apps/gpu/LBM/SphereRefined/configSphere.txt b/apps/gpu/SphereRefined/configSphere.txt
similarity index 100%
rename from apps/gpu/LBM/SphereRefined/configSphere.txt
rename to apps/gpu/SphereRefined/configSphere.txt
diff --git a/apps/gpu/LBM/SphereRefined/sphere02.stl b/apps/gpu/SphereRefined/sphere02.stl
similarity index 100%
rename from apps/gpu/LBM/SphereRefined/sphere02.stl
rename to apps/gpu/SphereRefined/sphere02.stl
diff --git a/apps/gpu/LBM/SphereScaling/CMakeLists.txt b/apps/gpu/SphereScaling/CMakeLists.txt
similarity index 81%
rename from apps/gpu/LBM/SphereScaling/CMakeLists.txt
rename to apps/gpu/SphereScaling/CMakeLists.txt
index db3747f2b620cab1efc5cf50f02aee1a8fee4a54..7d2d7722190e02876dc05e581cd9ce67a3362ebd 100644
--- a/apps/gpu/LBM/SphereScaling/CMakeLists.txt
+++ b/apps/gpu/SphereScaling/CMakeLists.txt
@@ -1,6 +1,6 @@
 PROJECT(SphereScaling LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES SphereScaling.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES SphereScaling.cpp)
 
 set_source_files_properties(SphereScaling.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/SphereScaling/SphereScaling.cpp
similarity index 98%
rename from apps/gpu/LBM/SphereScaling/SphereScaling.cpp
rename to apps/gpu/SphereScaling/SphereScaling.cpp
index 1db77aabeb03b6856d922bab0eec53b6d9cf63ae..8ada094a14b8ee19cdfb4c214f0f0faa4879c053 100755
--- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
+++ b/apps/gpu/SphereScaling/SphereScaling.cpp
@@ -1,15 +1,13 @@
 #define _USE_MATH_DEFINES
+#include <cmath>
 #include <exception>
+#include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <math.h>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <string>
-#include <filesystem>
-
-#include "mpi.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -38,7 +36,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
@@ -57,6 +54,7 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "utilities/communication.h"
+#include <parallel/MPICommunicator.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -64,9 +62,9 @@
 
 void runVirtualFluids(const vf::basics::ConfigurationFile& config)
 {
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     GridScalingFactory scalingFactory = GridScalingFactory();
 
@@ -166,12 +164,12 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
         const real dCubeLev1   = 72.0; // Phoenix: 72.0
 
         if (para->getNumprocs() > 1) {
-            const uint generatePart = vf::gpu::MpiCommunicator::getInstance().getPID();
+            const uint generatePart = communicator.getProcessID();
 
             real overlap = (real)8.0 * dxGrid;
             gridBuilder->setNumberOfLayers(10, 8);
 
-            if (communicator.getNumberOfProcess() == 2) {
+            if (communicator.getNumberOfProcesses() == 2) {
                 real zSplit = 0.5 * sideLengthCube;
 
                 if (scalingType == "weak") {
@@ -245,7 +243,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
 
-            } else if (communicator.getNumberOfProcess() == 4) {
+            } else if (communicator.getNumberOfProcesses() == 4) {
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
 
@@ -361,7 +359,7 @@ void runVirtualFluids(const vf::basics::ConfigurationFile& config)
                 gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
-            } else if (communicator.getNumberOfProcess() == 8) {
+            } else if (communicator.getNumberOfProcesses() == 8) {
                 real xSplit = 0.5 * sideLengthCube;
                 real ySplit = 0.5 * sideLengthCube;
                 real zSplit = 0.5 * sideLengthCube;
@@ -654,7 +652,7 @@ int main(int argc, char *argv[])
 
         try {
             VF_LOG_INFO("For the default config path to work, execute the app from the project root.");
-            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/LBM/SphereScaling/config.txt");
+            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv, "./apps/gpu/SphereScaling/config.txt");
             runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/SphereScaling/config.txt b/apps/gpu/SphereScaling/config.txt
similarity index 87%
rename from apps/gpu/LBM/SphereScaling/config.txt
rename to apps/gpu/SphereScaling/config.txt
index 5cfaeb492f97d348140ca3161b0738b4dd3ceaa7..33dd20588499b156eb187bc68c89c2d89ac4149e 100644
--- a/apps/gpu/LBM/SphereScaling/config.txt
+++ b/apps/gpu/SphereScaling/config.txt
@@ -1,12 +1,12 @@
 # Tesla 03
-# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/SphereScaling.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+# mpiexec -n 2 "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/build/bin/Release/SphereScaling.exe" "C:/Users/Master/Documents/MasterAnna/VirtualFluids_dev/apps/gpu/SphereScaling/config.txt"
 # Phoenix
-# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/config.txt"
+# mpirun -np 2 "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/SphereScaling/config.txt"
 
 # Phoenix mpich
-# mpirun -np 2 nvprof -f -o SphereScaling.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{PMI_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/SphereScaling/configPhoenix4GPU.txt"
 # Phoenix openmpi
-# mpirun -np 2 nvprof -f -o SphereScaling.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/LBM/SphereScaling/configPhoenix4GPU.txt"
+# mpirun -np 2 nvprof -f -o SphereScaling.%q{OMPI_COMM_WORLD_RANK}.nvprof "./VirtualFluids_dev/build/bin/SphereScaling" "./VirtualFluids_dev/apps/gpu/SphereScaling/configPhoenix4GPU.txt"
 
 ##################################################
 #GPU Mapping
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt b/apps/gpu/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt
rename to apps/gpu/SphereScaling/configPhoenix1GPU_1LevStrongOS.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix1GPU_1LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt b/apps/gpu/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt
rename to apps/gpu/SphereScaling/configPhoenix2GPU_1LevStrongOS.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix2GPU_1LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt b/apps/gpu/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt
rename to apps/gpu/SphereScaling/configPhoenix4GPU_1LevStrongOS.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix4GPU_1LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt b/apps/gpu/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt
rename to apps/gpu/SphereScaling/configPhoenix4GPU_1LevWeakStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_2LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix4GPU_2LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix4GPU_2LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix4GPU_2LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt b/apps/gpu/SphereScaling/configPhoenix4GPU_regressionTest.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt
rename to apps/gpu/SphereScaling/configPhoenix4GPU_regressionTest.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_1LevStrongOS.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_1LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_1LevWeakOS.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_1LevWeakStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_2LevStrongStream.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_2LevStrongStream.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_2LevStrongStream.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_2LevStrongStream.txt
diff --git a/apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt b/apps/gpu/SphereScaling/configPhoenix8GPU_regressionTest.txt
similarity index 100%
rename from apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt
rename to apps/gpu/SphereScaling/configPhoenix8GPU_regressionTest.txt
diff --git a/apps/gpu/LBM/TGV_3D/CMakeLists.txt b/apps/gpu/TGV_3D/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D/CMakeLists.txt
rename to apps/gpu/TGV_3D/CMakeLists.txt
diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/TGV_3D/TGV_3D.cpp
similarity index 98%
rename from apps/gpu/LBM/TGV_3D/TGV_3D.cpp
rename to apps/gpu/TGV_3D/TGV_3D.cpp
index 3ec7ac651f69540e26a7def4cd630b110c64f639..0a2d47dfc5ff939917314cbb5195123e34b91bf4 100644
--- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
+++ b/apps/gpu/TGV_3D/TGV_3D.cpp
@@ -35,14 +35,12 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <math.h>
+#include <cmath>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <string>
 
-#include "mpi.h"
-
 //////////////////////////////////////////////////////////////////////////
 
 #include "DataTypes.h"
@@ -66,7 +64,6 @@
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
@@ -76,7 +73,7 @@
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 
-
+#include <parallel/MPICommunicator.h>
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -131,13 +128,11 @@ std::string simulationName("TGV_3D");
 
 void multipleLevel(const std::string& configPath)
 {
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
-
-    
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/TGV_3D/config.txt b/apps/gpu/TGV_3D/config.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D/config.txt
rename to apps/gpu/TGV_3D/config.txt
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt b/apps/gpu/TGV_3D_GridRef/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_GridRef/CMakeLists.txt
rename to apps/gpu/TGV_3D_GridRef/CMakeLists.txt
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp b/apps/gpu/TGV_3D_GridRef/TGV_3D_GridRef.cpp
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_GridRef/TGV_3D_GridRef.cpp
rename to apps/gpu/TGV_3D_GridRef/TGV_3D_GridRef.cpp
diff --git a/apps/gpu/LBM/TGV_3D_GridRef/config.txt b/apps/gpu/TGV_3D_GridRef/config.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_GridRef/config.txt
rename to apps/gpu/TGV_3D_GridRef/config.txt
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/CMakeLists.txt b/apps/gpu/TGV_3D_MultiGPU/CMakeLists.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_MultiGPU/CMakeLists.txt
rename to apps/gpu/TGV_3D_MultiGPU/CMakeLists.txt
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
rename to apps/gpu/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/config.txt b/apps/gpu/TGV_3D_MultiGPU/config.txt
similarity index 100%
rename from apps/gpu/LBM/TGV_3D_MultiGPU/config.txt
rename to apps/gpu/TGV_3D_MultiGPU/config.txt
diff --git a/apps/gpu/LBM/WTG_RUB/CMakeLists.txt b/apps/gpu/WTG_RUB/CMakeLists.txt
similarity index 82%
rename from apps/gpu/LBM/WTG_RUB/CMakeLists.txt
rename to apps/gpu/WTG_RUB/CMakeLists.txt
index 606987dfb093c9c93bbd25bf5ff68fdc81311e1b..d67ec1c079fa418bcfc7374aa7aa7673756329a8 100644
--- a/apps/gpu/LBM/WTG_RUB/CMakeLists.txt
+++ b/apps/gpu/WTG_RUB/CMakeLists.txt
@@ -1,6 +1,6 @@
 PROJECT(WTG_RUB LANGUAGES CUDA CXX)
 
-vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES WTG_RUB.cpp)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator FILES WTG_RUB.cpp)
 
 set_source_files_properties(WTG_RUB.cpp PROPERTIES LANGUAGE CUDA)
 
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/WTG_RUB/WTG_RUB.cpp
similarity index 99%
rename from apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
rename to apps/gpu/WTG_RUB/WTG_RUB.cpp
index 1df7d5d1e586a8f003ce1693a3af479734948744..9535488f76b8f61eb75d38f7fc5266cc2467c3ba 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/WTG_RUB/WTG_RUB.cpp
@@ -31,7 +31,7 @@
 //! \author Martin Schoenherr
 //=======================================================================================
 #define _USE_MATH_DEFINES
-#include <math.h>
+#include <cmath>
 #include <string>
 #include <sstream>
 #include <iostream>
@@ -41,8 +41,6 @@
 #include <memory>
 #include <filesystem>
 
-#include "mpi.h"
-
 //////////////////////////////////////////////////////////////////////////
 
 #include "DataTypes.h"
@@ -68,7 +66,6 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -77,6 +74,7 @@
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 
+#include <parallel/MPICommunicator.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -129,7 +127,8 @@ std::string chooseVariation();
 
 void multipleLevel(const std::string& configPath)
 {
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
+    auto gridBuilder = MultipleGridBuilder::makeShared();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -215,7 +214,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/apps/gpu/LBM/WTG_RUB/configDrivenCavity.txt b/apps/gpu/WTG_RUB/configDrivenCavity.txt
similarity index 100%
rename from apps/gpu/LBM/WTG_RUB/configDrivenCavity.txt
rename to apps/gpu/WTG_RUB/configDrivenCavity.txt
diff --git a/apps/gpu/LBM/gridGeneratorTest/CMakeLists.txt b/apps/gpu/gridGeneratorTest/CMakeLists.txt
similarity index 51%
rename from apps/gpu/LBM/gridGeneratorTest/CMakeLists.txt
rename to apps/gpu/gridGeneratorTest/CMakeLists.txt
index 6493b72cfd996a6866fe0fb07291dd2e3438dd03..b946a710edffbdcb98894e0d49763c6ead3dabf9 100644
--- a/apps/gpu/LBM/gridGeneratorTest/CMakeLists.txt
+++ b/apps/gpu/gridGeneratorTest/CMakeLists.txt
@@ -1,5 +1,3 @@
-PROJECT(GridGeneratorTest LANGUAGES CUDA CXX)
+PROJECT(GridGeneratorTest LANGUAGES CXX)
 
 vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES gridGenerator.cpp)
-
-set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/apps/gpu/LBM/gridGeneratorTest/config.txt b/apps/gpu/gridGeneratorTest/config.txt
similarity index 100%
rename from apps/gpu/LBM/gridGeneratorTest/config.txt
rename to apps/gpu/gridGeneratorTest/config.txt
diff --git a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp b/apps/gpu/gridGeneratorTest/gridGenerator.cpp
similarity index 91%
rename from apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
rename to apps/gpu/gridGeneratorTest/gridGenerator.cpp
index 90cc4c0ab33745c65688db4bf170b25a9099da51..501dfd06e918a0dc74680c4abc791da9a2b77e8a 100644
--- a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
+++ b/apps/gpu/gridGeneratorTest/gridGenerator.cpp
@@ -1,24 +1,14 @@
-//#define MPI_LOGGING
-
-//Martin Branch
-
-#include <mpi.h>
-#if defined( MPI_LOGGING )
-	#include <mpe.h>
-#endif
-
-#include <string>
+#include <fstream>
 #include <iostream>
 #include <stdexcept>
-#include <fstream>
+#include <string>
 #define _USE_MATH_DEFINES
-#include <math.h>
+#include <cmath>
 
 #include "StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
 
 #include "VirtualFluids_GPU/LBM/Simulation.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
@@ -55,45 +45,26 @@
 #include "utilities/communication.h"
 #include "utilities/transformator/TransformatorImp.h"
 
+#include <parallel/MPICommunicator.h>
 
-void multipleLevel(const std::string& configPath)
+void runVirtualFluids(const vf::basics::ConfigurationFile &config)
 {
-    auto gridFactory = GridFactory::make();
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::RAYCASTING);
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-    //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
-
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
+    auto gridBuilder = std::make_shared<MultipleGridBuilder>();
 
-    vf::gpu::Communicator& communicator = vf::gpu::MpiCommunicator::getInstance();
-    vf::basics::ConfigurationFile config;
-    config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcess(), communicator.getPID(), &config);
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNumberOfProcesses(), communicator.getProcessID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     bool useGridGenerator = true;
 
-    if(useGridGenerator){
-
-        enum testCase{
-			TGV,
-			TGV3D,
-			SphereTest,
-			DrivAer,
-            PaperPlane,
-            DLC,
-            MultiGPU,
-            StlGroupTest
-        };
+    if (useGridGenerator) {
+        enum testCase { TGV, TGV3D, SphereTest, DrivAer, PaperPlane, DLC, MultiGPU, StlGroupTest };
 
         int testcase = SphereTest;
 
-		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		if (testcase == TGV)
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		{
@@ -540,7 +511,7 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->addGrid(DLC_RefBox_Level_3, 3);
             gridBuilder->addGrid(DLC_RefBox_Level_4, 4);
 
-            Conglomerate* refinement = new Conglomerate();
+            auto refinement = std::make_shared<Conglomerate>();
             refinement->add(DLC_RefBox_Level_5);
             refinement->add(VW370_SERIE_STL);
 
@@ -555,10 +526,10 @@ void multipleLevel(const std::string& configPath)
 
             //////////////////////////////////////////////////////////////////////////
 
-            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx , 0.0, 0.0);
-            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx , 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PY, vx, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MY, vx, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx, 0.0, 0.0);
+            gridBuilder->setVelocityBoundaryCondition(SideType::MZ, vx, 0.0, 0.0);
 
             gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
             gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
@@ -649,7 +620,7 @@ void multipleLevel(const std::string& configPath)
             para->setMaxDev(2);
 
             //const uint generatePart = 1;
-            const uint generatePart = communicator.getPID();
+            const uint generatePart = communicator.getProcessID();
 
             std::ofstream logFile2;
 
@@ -700,12 +671,12 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->buildGrids(true); // buildGrids() has to be called before setting the BCs!!!!
 
             if( generatePart == 0 ){
-                gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
+                gridBuilder->findCommunicationIndices(CommunicationDirections::PX);
                 gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
             }
 
             if( generatePart == 1 ){
-                gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
+                gridBuilder->findCommunicationIndices(CommunicationDirections::MX);
                 gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
             }
 
@@ -752,75 +723,46 @@ void multipleLevel(const std::string& configPath)
             //gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
         }
 
-    }
-    else
-    {
+    } else {
         //gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
         //gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
     }
 
-    logFile.close();
-
     //return;
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
 
     SPtr<GridProvider> gridGenerator;
-    if( useGridGenerator ) gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
-    else                   gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
+    if (useGridGenerator)
+        gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+    else
+        gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
 
-    SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
     Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
     sim.run();
 }
 
-int main( int argc, char* argv[])
+int main(int argc, char *argv[])
 {
-    MPI_Init(&argc, &argv);
-    std::string str, str2;
-    if ( argv != NULL )
-    {
-        //str = static_cast<std::string>(argv[0]);
-
-        try
-        {
-            //////////////////////////////////////////////////////////////////////////
-
-			std::string targetPath;
-
-			targetPath = __FILE__;
-
-#ifdef _WIN32
-			targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
-#else
-			targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
-#endif
+    if (argc > 1) {
 
-			std::cout << targetPath << std::endl;
-
-			multipleLevel(targetPath + "config.txt");
+        try {
+            VF_LOG_TRACE("For the default config path to work, execute the app from the project root.");
+            vf::basics::ConfigurationFile config = vf::basics::loadConfig(argc, argv);
+            runVirtualFluids(config);
 
             //////////////////////////////////////////////////////////////////////////
-		}
-        catch (const std::bad_alloc& e)
-        {
-            std::cout << "Bad alloc: " << e.what() << std::flush;
-        }
-        catch (const std::exception& e)
-        {
-            std::cout << e.what() << std::flush;
+        } catch (const spdlog::spdlog_ex &ex) {
+            std::cout << "Log initialization failed: " << ex.what() << std::endl;
+        } catch (const std::bad_alloc &e) {
+            VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+        } catch (const std::exception &e) {
+            VF_LOG_CRITICAL("exception: {}", e.what());
+        } catch (...) {
+            VF_LOG_CRITICAL("Unknown exception!");
         }
-        catch (...)
-        {
-            std::cout << "unknown exeption" << std::endl;
-        }
-
     }
-
-   MPI_Finalize();
-   return 0;
+    return 0;
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp
index 3564f65025a38d37c577b6ea6a882e6f48fd1e65..baa9ed03f7b5b074409c5c487265e77307067674 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp
@@ -9,9 +9,10 @@
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
 
 #include "VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 
+#include <parallel/MPICommunicator.h>
+
 std::shared_ptr<Parameter> vf::gpu::tests::makeParameter(std::shared_ptr<SimulationParameter> simPara)
 {
     auto para = std::make_shared<Parameter>(1, 0);
@@ -119,8 +120,9 @@ const std::function<void()> vf::gpu::tests::makeVirtualFluidSimulation(std::shar
     auto cudaManager = std::make_shared<CudaMemoryManager>(para);
     auto grid = makeGridReader(condition, para, cudaManager);
     BoundaryConditionFactory bc_factory;
+    vf::parallel::Communicator &communicator = *vf::parallel::MPICommunicator::getInstance();
     auto simulation =
-        std::make_shared<Simulation>(para, cudaManager, vf::gpu::MpiCommunicator::getInstance(), *grid.get(), &bc_factory);
+        std::make_shared<Simulation>(para, cudaManager, communicator, *grid.get(), &bc_factory);
     simulation->setDataWriter(dataWriter);
 
     return [simulation]() { simulation->run(); };
diff --git a/apps/gpu/wifi/UniformInflow/configUniformInflow.txt b/apps/gpu/wifi/UniformInflow/configUniformInflow.txt
new file mode 100755
index 0000000000000000000000000000000000000000..8509a4ef259a168b31781e63e724371f15e0b46e
--- /dev/null
+++ b/apps/gpu/wifi/UniformInflow/configUniformInflow.txt
@@ -0,0 +1,34 @@
+##################################################
+#informations for Writing
+##################################################
+Path = output/wifi/
+##################################################
+#informations for reading
+##################################################
+GridPath = .
+##################################################
+Devices = 0 
+##################################################
+tStartOut           = 0
+tOut                = 10000
+tEnd                = 10000
+##################################################
+tStartAveraging     = 0
+tStartTmpAveraging  = 0
+tAveraging          = 10
+tStartOutProbe      = 10
+tOutProbe           = 10
+##################################################
+Ma = 0.05
+nodesPerDiameter = 32
+bodyForce = true
+SGSconstant = 0.333
+TurbulenceModel = QR
+QuadricLimiterP = 100000.0
+QuadricLimiterM = 100000.0
+QuadricLimiterD = 100000.0
+
+##################################################
+turbineDiameter = 126
+NumberOfNodesPerAL = 100
+# SmearingWidth = 15
\ No newline at end of file
diff --git a/apps/gpu/wifi/UniformInflow/uniform_inflow.py b/apps/gpu/wifi/UniformInflow/uniform_inflow.py
new file mode 100755
index 0000000000000000000000000000000000000000..1c71d7c362326d2f49166f76350ab4b8d0ccf1c0
--- /dev/null
+++ b/apps/gpu/wifi/UniformInflow/uniform_inflow.py
@@ -0,0 +1,150 @@
+#%%
+import numpy as np
+from pathlib import Path
+from mpi4py import MPI
+from pyfluids import basics, gpu, logger
+from wiFI.wind_farm import create_wind_farm_from_json
+from wiFI.logging.logger import LoggerConfig
+from wiFI.aeroelastics.stiff_rotor import StiffRotorGPU
+from wiFI.interfaces.implementations.velocity_provider.VirtualFluids import VFFarm 
+import multiprocessing as mp
+
+mp.set_start_method("spawn", force=True)
+#%%
+def main():
+    communicator = gpu.MpiCommunicator.get_instance()
+    sim_name = "NREL5MW"
+    sim_dir = Path("/workspaces/VirtualFluids_dev/output/wifi/")
+    config_file = Path("apps/gpu/wifi/UniformInflow")/"configUniformInflow.txt"
+    farm_file = Path("/workspaces/VirtualFluids_dev/wifi/resources/turbine_data/NREL5MW")/"SingleTurbine.json"
+    use_tip_correction = False
+    tip_speed_ratio = 7.5
+    #%%
+    logger.Logger.initialize_logger()
+    #%%
+    grid_builder = gpu.grid_generator.MultipleGridBuilder()
+
+    config = basics.ConfigurationFile()
+    config.load(str(config_file))
+
+    para = gpu.Parameter(communicator.get_number_of_process(), communicator.get_pid(), config)
+    para.set_use_streams(True)
+    bc_factory = gpu.BoundaryConditionFactory()
+
+    grid_scaling_factory = gpu.GridScalingFactory()
+    grid_scaling_factory.set_scaling_factory(gpu.GridScaling.ScaleCompressible)
+
+    #%%
+    turbine_diameter = config.get_float_value("turbineDiameter", 126)
+
+
+    viscosity = config.get_float_value("viscosity", 1.56e-5)
+
+    velocity  = 8
+    mach = config.get_float_value("Ma", 0.05)
+    nodes_per_diameter = config.get_uint_value("NodesPerDiameter", 32)
+
+    density = config.get_float_value("Density", 1.225)
+    level = 0
+    n_blade_nodes  = config.get_int_value("NumberOfNodesPerAL", 32)
+
+
+    # all in s
+    t_start_out   = config.get_float_value("tStartOut")
+    t_out         = config.get_float_value("tOut")
+    t_end         = config.get_float_value("tEnd") # total time of simulation
+
+    t_start_averaging      = config.get_float_value("tStartAveraging")
+    t_start_tmp_averaging  = config.get_float_value("tStartTmpAveraging")
+    t_averaging            = config.get_float_value("tAveraging")
+    t_start_out_probe      = config.get_float_value("tStartOutProbe")
+    t_out_probe            = config.get_float_value("tOutProbe")
+
+    #%%
+    length = np.array([4,3,3])*turbine_diameter
+    dx = turbine_diameter / nodes_per_diameter
+    dt = dx * mach / (np.sqrt(3) * velocity)
+    velocity_LB = velocity * dt / dx # LB units
+    viscosity_LB = viscosity * dt / (dx * dx) # LB units
+    pressure_gradient = 0
+    epsilon = dx*pow(2,-level)*1.5
+
+    logger.vf_log_info(f"velocity  [dx/dt] = {velocity_LB}")
+    logger.vf_log_info(f"dt   = {dt}")
+    logger.vf_log_info(f"dx   = {dx}")
+    logger.vf_log_info(f"viscosity [10^8 dx^2/dt] = {viscosity_LB*1e8}")
+    logger.vf_log_info(f"dpdx  = {pressure_gradient}")
+    logger.vf_log_info(f"mach number  = {mach}")
+
+    farm = create_wind_farm_from_json(farm_file, sim_dir, tip_speed_ratio, velocity, True, log_turbine=True, logger_config=LoggerConfig(0, 1.0, timesteps_in_buffer=1))
+    
+    farm.turbine.add_blade_forces_logging(True)
+    farm.turbine.add_blade_coordinate_logging(True)
+    farm.turbine.add_blade_velocities_logging(True)
+
+    #%%
+    para.set_output_prefix(sim_name)
+    para.set_print_files(True)
+
+    para.set_forcing(0, 0, 0)
+    para.set_velocity_LB(velocity_LB)
+    para.set_viscosity_LB(viscosity_LB)    
+    para.set_velocity_ratio(dx/dt)
+    para.set_viscosity_ratio(dx*dx/dt)
+
+    para.set_main_kernel("CumulantK17")
+
+    para.set_timestep_start_out(int(t_start_out))
+    # para.set_timestep_out(20)
+    para.set_timestep_out(int(t_out))
+    para.set_timestep_end(int(t_end))
+    para.set_is_body_force(True)
+    #%%
+    tm_factory = gpu.TurbulenceModelFactory(para)
+    tm_factory.read_config_file(config)
+    #%%
+    grid_builder.add_coarse_grid(-1.*turbine_diameter, -0.5 * length[1], -0.5 * length[2], length[0]-1.*turbine_diameter, 0.5 * length[1], 0.5 * length[2], dx)
+    grid_builder.set_periodic_boundary_condition(False, True, True)
+    grid_builder.build_grids(False)
+
+
+
+    grid_builder.set_velocity_boundary_condition(gpu.SideType.MX, velocity_LB, 0.0, 0.0)
+    grid_builder.set_pressure_boundary_condition(gpu.SideType.PX, 0)
+
+    bc_factory.set_velocity_boundary_condition(gpu.VelocityBC.VelocityCompressible)
+    bc_factory.set_pressure_boundary_condition(gpu.PressureBC.OutflowNonReflective)
+
+    #%%
+    para.set_initial_condition_uniform(velocity_LB, 0, 0)
+
+    coupled_farm = VFFarm(farm, density, epsilon, level, dt, dx,  n_blade_nodes, StiffRotorGPU, (density, ), use_tip_correction, 0)
+    para.add_actuator(coupled_farm)
+
+    # wall_model_probe = gpu.probes.WallModelProbe("wallModelProbe", para.get_output_path(), int(t_start_averaging/dt), int(t_start_tmp_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt))
+    # wall_model_probe.add_all_available_statistics()
+    # wall_model_probe.set_file_name_to_n_out()
+    # wall_model_probe.set_force_output_to_stress(True)
+    # if para.get_is_body_force():
+    #     wall_model_probe.set_evaluate_pressure_gradient(True)
+    # para.add_probe(wall_model_probe)
+
+    # plane_locs = [farm.positions.x[0] + i*turbine_diameter for i in range(-1,6)]
+
+    # for n_probe, probe_pos in enumerate(plane_locs):
+    #     plane_probe = gpu.probes.PlaneProbe(f"planeProbe_{n_probe+1}", para.get_output_path(), int(t_start_averaging/dt), int(t_averaging/dt), int(t_start_out_probe/dt), int(t_out_probe/dt))
+    #     plane_probe.set_probe_plane(probe_pos, 0, 0, dx, length[1], length[2])
+    #     plane_probe.add_all_available_statistics()
+    #     para.add_probe(plane_probe)
+
+    #%%
+    cuda_memory_manager = gpu.CudaMemoryManager(para)
+    grid_generator = gpu.GridProvider.make_grid_generator(grid_builder, para, cuda_memory_manager, communicator)
+    #%%
+    sim = gpu.Simulation(para, cuda_memory_manager, communicator, grid_generator, bc_factory, tm_factory, grid_scaling_factory)
+    #%%
+    sim.run()
+    MPI.Finalize()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/gpu.cmake b/gpu.cmake
index 6bdbf656326c4d733ec2c5b5d08751b3055a7b00..8f3fde523efa4cbf2f0884723728832f82a53231 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -5,20 +5,23 @@
 add_subdirectory(src/gpu/GridGenerator)
 add_subdirectory(src/gpu/VirtualFluids_GPU)
 
-if(BUILD_VF_ALL_SAMPLES)
-    list(APPEND USER_APPS 
-    "apps/gpu/LBM/ActuatorLine"
-    "apps/gpu/LBM/SphereScaling" 
-    "apps/gpu/LBM/TGV_3D")
-endif()
-
 #############################################################
 ###                  Apps                                 ###
 #############################################################
 
-add_subdirectory(apps/gpu/LBM/DrivenCavity)
-add_subdirectory(apps/gpu/LBM/SphereGPU)
-add_subdirectory(apps/gpu/LBM/BoundaryLayer)
+if(BUILD_VF_ALL_SAMPLES)
+    list(APPEND USER_APPS
+    "apps/gpu/DrivenCavityUniform"
+    "apps/gpu/DrivenCavityMultiGPU"
+    "apps/gpu/ActuatorLine"
+    "apps/gpu/SphereScaling" 
+    "apps/gpu/TGV_3D"
+    "apps/gpu/gridGeneratorTest")
+endif()
+
+add_subdirectory(apps/gpu/DrivenCavity)
+add_subdirectory(apps/gpu/SphereGPU)
+add_subdirectory(apps/gpu/BoundaryLayer)
 
 #############################################################
 ###                   Numeric Tests                       ###
diff --git a/pythonbindings/CMakeLists.txt b/pythonbindings/CMakeLists.txt
index c9bef9ef080e6e995b903837475f7c2e401152ee..8dbf9047ac964b2886a180c6be5d1b753e4fcb24 100644
--- a/pythonbindings/CMakeLists.txt
+++ b/pythonbindings/CMakeLists.txt
@@ -41,18 +41,27 @@ target_include_directories(lbm_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
 target_include_directories(lbm_bindings PRIVATE ${CMAKE_BINARY_DIR})
 add_dependencies(python_bindings lbm_bindings)
 
+pybind11_add_module(communicator_bindings MODULE src/communicator.cpp)
+set_target_properties(  communicator_bindings PROPERTIES
+                        LIBRARY_OUTPUT_DIRECTORY ${PYFLUIDS_DIR}
+                        OUTPUT_NAME "communicator")
+target_link_libraries(communicator_bindings PRIVATE parallel)
+target_include_directories(communicator_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
+target_include_directories(communicator_bindings PRIVATE ${CMAKE_BINARY_DIR})
+target_compile_definitions(communicator_bindings PRIVATE VF_MPI)
+add_dependencies(python_bindings communicator_bindings)
+
 
 IF(BUILD_VF_GPU)
     pybind11_add_module(gpu_bindings MODULE src/gpu/gpu.cpp)
     set_target_properties(  gpu_bindings PROPERTIES
                             LIBRARY_OUTPUT_DIRECTORY ${PYFLUIDS_DIR}
                             OUTPUT_NAME "gpu")
-    target_link_libraries(gpu_bindings PRIVATE basics)
     set_source_files_properties(src/gpu/gpu.cpp PROPERTIES LANGUAGE CUDA)
 
     target_include_directories(gpu_bindings PRIVATE ${VF_THIRD_DIR}/cuda_samples/)
 
-    target_link_libraries(gpu_bindings PRIVATE GridGenerator VirtualFluids_GPU)
+    target_link_libraries(gpu_bindings PRIVATE basics GridGenerator VirtualFluids_GPU parallel)
 
     target_include_directories(gpu_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
     target_include_directories(gpu_bindings PRIVATE ${CMAKE_BINARY_DIR})
@@ -70,15 +79,9 @@ IF(BUILD_VF_CPU)
     target_include_directories(cpu_bindings PRIVATE ${CMAKE_SOURCE_DIR}/src/)
     target_include_directories(cpu_bindings PRIVATE ${CMAKE_BINARY_DIR})
 
-    target_compile_definitions(cpu_bindings PUBLIC VF_DOUBLE_ACCURACY) # TODO: remove this and always set it dynamically
-    target_compile_definitions(basics_bindings PUBLIC VF_DOUBLE_ACCURACY)
-    target_compile_definitions(logger_bindings PUBLIC VF_DOUBLE_ACCURACY)
-    target_compile_definitions(lbm_bindings PUBLIC VF_DOUBLE_ACCURACY)
-
     target_compile_definitions(cpu_bindings PRIVATE VF_METIS VF_MPI)
     add_dependencies(python_bindings cpu_bindings)
 
-
     # include bindings for muparsers
     pybind11_add_module(pymuparser MODULE src/muParser.cpp)
 
@@ -91,3 +94,16 @@ IF(BUILD_VF_CPU)
     target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI)
     target_link_libraries(pymuparser PRIVATE muparser)
 ENDIF()
+
+if(BUILD_VF_DOUBLE_ACCURACY)
+IF(BUILD_VF_CPU)
+    target_compile_definitions(cpu_bindings PRIVATE VF_DOUBLE_ACCURACY)
+endif()
+    target_compile_definitions(basics_bindings PRIVATE VF_DOUBLE_ACCURACY)
+    target_compile_definitions(logger_bindings PRIVATE VF_DOUBLE_ACCURACY)
+    target_compile_definitions(lbm_bindings PRIVATE VF_DOUBLE_ACCURACY)
+    IF(BUILD_VF_GPU)
+    target_compile_definitions(gpu_bindings PRIVATE VF_DOUBLE_ACCURACY)
+    endif()
+    target_compile_definitions(communicator_bindings PRIVATE VF_DOUBLE_ACCURACY)
+endif()
\ No newline at end of file
diff --git a/pythonbindings/pyfluids-stubs/gpu/__init__.pyi b/pythonbindings/pyfluids-stubs/gpu/__init__.pyi
index cc6e3906477e5206a7d3be159cda15909aa1ba68..8f78fd3387dc59c94ddf2315635e674a7330dae9 100644
--- a/pythonbindings/pyfluids-stubs/gpu/__init__.pyi
+++ b/pythonbindings/pyfluids-stubs/gpu/__init__.pyi
@@ -178,7 +178,8 @@ class GridProvider:
     @staticmethod
     def make_grid_generator(builder: grid_generator.GridBuilder, para: Parameter, cuda_memory_manager: CudaMemoryManager, communicator: MpiCommunicator) -> GridProvider: ...
 
-
+class MultipleGridBuilder:
+    def __init__(self) -> None: ...
 
 class GridScaling:
     __members__: ClassVar[dict] = ...  # read-only
diff --git a/pythonbindings/pyfluids/__init__.py b/pythonbindings/pyfluids/__init__.py
index f0537b758267e22a72e5030340de7b87d52f35c3..5b4197972c0a8738c551e57e635415e3858f53e6 100644
--- a/pythonbindings/pyfluids/__init__.py
+++ b/pythonbindings/pyfluids/__init__.py
@@ -33,22 +33,26 @@ r"""
 =======================================================================================
 """
 try:
-    from .bindings import basics
+    from . import basics
 except ImportError:
     print("Basics bindings not included")
 try:
-    from .bindings import logger
+    from . import logger
 except ImportError:
     print("Logger bindings not included")
 try:
-    from .bindings import lbm
+    from . import lbm
 except ImportError:
     print("LBM bindings not included")
 try:
-    from .bindings import gpu
+    from . import communicator
+except ImportError:
+    print("communicator bindings not included")
+try:
+    from . import gpu
 except ImportError:
     print("GPU bindings not included")
 try:
-    from .bindings import cpu
+    from . import cpu
 except ImportError:
     print("CPU bindings not included")
\ No newline at end of file
diff --git a/pythonbindings/src/VirtualFluids.cpp b/pythonbindings/src/communicator.cpp
similarity index 72%
rename from pythonbindings/src/VirtualFluids.cpp
rename to pythonbindings/src/communicator.cpp
index 91682b79e8125a7513565b28e2e22e74e0b2dac1..fe706bb9eba4083f2a1a705a3ee615b727de77c8 100644
--- a/pythonbindings/src/VirtualFluids.cpp
+++ b/pythonbindings/src/communicator.cpp
@@ -26,39 +26,24 @@
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file VirtualFluids.cpp
-//! \ingroup src
+//! \file communicator.cpp
+//! \ingroup submodules
 //! \author Henry Korb
 //=======================================================================================
+#include <pybind11/cast.h>
 #include <pybind11/pybind11.h>
-#include "basics/basics.cpp"
-#include "lbm/lbm.cpp"
-#include "logger/logger.cpp"
 
-#ifdef VF_GPU_PYTHONBINDINGS
-#include "gpu/gpu.cpp"
-#endif
-#ifdef VF_CPU_PYTHONBINDINGS
-#include "cpu/cpu.cpp"
-#endif
+#include <parallel/MPICommunicator.h>
 
-
-namespace py_bindings
+namespace communicator_bindings
 {
-    namespace py = pybind11;
+namespace py = pybind11;
 
-    PYBIND11_MODULE(bindings, m)
-    {
-        // because we do not use the old logger (src/basics/logger) anymore and cout is not passed anymore to the old logger, we probably do not need this anymore
-        // pybind11::add_ostream_redirect(m, "ostream_redirect");
-        basics::makeModule(m);
-        lbm::makeModule(m);
-        logging::makeModule(m);
-#ifdef VF_GPU_PYTHONBINDINGS
-        gpu::makeModule(m);
-#endif
-#ifdef VF_CPU_PYTHONBINDINGS
-        cpu::makeModule(m);
-#endif
-    }
+PYBIND11_MODULE(communicator, m)
+{
+    py::class_<vf::parallel::MPICommunicator, std::shared_ptr<vf::parallel::MPICommunicator>>(m, "Communicator")
+        .def_static("get_instance", &vf::parallel::MPICommunicator::getInstance)
+        .def("get_number_of_processes", &vf::parallel::MPICommunicator::getNumberOfProcesses)
+        .def("get_process_id", py::overload_cast<>(&vf::parallel::MPICommunicator::getProcessID, py::const_));
 }
+} // namespace communicator_bindings
diff --git a/pythonbindings/src/gpu/gpu.cpp b/pythonbindings/src/gpu/gpu.cpp
index 8946b1d8af7655682a19e793119b27ab77f6f542..dcb4ded4b1f0f92748323081b7de9504c2995542 100644
--- a/pythonbindings/src/gpu/gpu.cpp
+++ b/pythonbindings/src/gpu/gpu.cpp
@@ -35,7 +35,6 @@
 #include "submodules/simulation.cpp"
 #include "submodules/parameter.cpp"
 #include "submodules/boundary_conditions.cpp"
-#include "submodules/communicator.cpp"
 #include "submodules/cuda_memory_manager.cpp"
 #include "submodules/probes.cpp"
 #include "submodules/precursor_writer.cpp"
@@ -48,23 +47,20 @@
 
 namespace gpu_bindings
 {
-    namespace py = pybind11;
-
-    PYBIND11_MODULE(gpu, m)
-    {
-        simulation::makeModule(m);
-        parameter::makeModule(m);
-        pre_collision_interactor::makeModule(m);
-        actuator_farm::makeModule(m);
-        boundary_conditions::makeModule(m);
-        transient_bc_setter::makeModule(m);
-        communicator::makeModule(m); 
-        cuda_memory_manager::makeModule(m);
-        probes::makeModule(m);
-        precursor_writer::makeModule(m);
-        grid_generator::makeModule(m);
-        grid_provider::makeModule(m);
-        turbulence_model::makeModule(m);
-        grid_scaling_factory::makeModule(m);
-    }
-}
\ No newline at end of file
+PYBIND11_MODULE(gpu, m)
+{
+    simulation::makeModule(m);
+    parameter::makeModule(m);
+    pre_collision_interactor::makeModule(m);
+    actuator_farm::makeModule(m);
+    boundary_conditions::makeModule(m);
+    transient_bc_setter::makeModule(m);
+    cuda_memory_manager::makeModule(m);
+    probes::makeModule(m);
+    precursor_writer::makeModule(m);
+    grid_generator::makeModule(m);
+    grid_provider::makeModule(m);
+    turbulence_model::makeModule(m);
+    grid_scaling_factory::makeModule(m);
+}
+} // namespace gpu_bindings
diff --git a/pythonbindings/src/gpu/submodules/communicator.cpp b/pythonbindings/src/gpu/submodules/communicator.cpp
deleted file mode 100644
index 1cf40090f59313536e5bbe650995a09aed5ebb55..0000000000000000000000000000000000000000
--- a/pythonbindings/src/gpu/submodules/communicator.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file communicator.cpp
-//! \ingroup submodules
-//! \author Henry Korb
-//=======================================================================================
-#include <pybind11/pybind11.h>
-#include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
-#include <gpu/VirtualFluids_GPU/Communication/MpiCommunicator.h>
-
-namespace communicator
-{
-    namespace py = pybind11;
-
-    void makeModule(py::module_ &parentModule)
-    {
-        py::class_<vf::gpu::CommunicationRoutine, std::unique_ptr<vf::gpu::CommunicationRoutine, py::nodelete>>(parentModule, "CommunicationRoutine");
-
-        py::class_<vf::gpu::Communicator, vf::gpu::CommunicationRoutine, std::unique_ptr<vf::gpu::Communicator, py::nodelete>>(parentModule, "Communicator")
-            .def("get_number_of_process", &vf::gpu::Communicator::getNumberOfProcess)
-            .def("get_pid", &vf::gpu::Communicator::getPID);
-
-        py::class_<vf::gpu::MpiCommunicator, vf::gpu::Communicator, std::unique_ptr<vf::gpu::MpiCommunicator, py::nodelete>>(parentModule, "MpiCommunicator")
-            .def_static("get_instance", &vf::gpu::MpiCommunicator::getInstance, py::return_value_policy::reference);
-    }
-} // namespace communicator
\ No newline at end of file
diff --git a/pythonbindings/src/gpu/submodules/grid_generator.cpp b/pythonbindings/src/gpu/submodules/grid_generator.cpp
index c523e3932aa6e462a3a08db11e147bd1a19567e9..6c57900e4298094b59355e82a22433e1b647e059 100644
--- a/pythonbindings/src/gpu/submodules/grid_generator.cpp
+++ b/pythonbindings/src/gpu/submodules/grid_generator.cpp
@@ -95,6 +95,7 @@ namespace grid_generator
         .def("set_stress_boundary_condition", &LevelGridBuilder::setStressBoundaryCondition, py::arg("side_type"), py::arg("normal_x"), py::arg("normal_y"), py::arg("normal_z"), py::arg("sampling_offset"), py::arg("z0"), py::arg("dx"));
 
         py::class_<MultipleGridBuilder, LevelGridBuilder, std::shared_ptr<MultipleGridBuilder>>(gridGeneratorModule, "MultipleGridBuilder")
+        .def(py::init())
         .def("add_coarse_grid", &MultipleGridBuilder::addCoarseGrid, py::arg("start_x"), py::arg("start_y"), py::arg("start_z"), py::arg("end_x"), py::arg("end_y"), py::arg("end_z"), py::arg("delta"))
         .def("add_grid", py::overload_cast<SPtr<Object>>(&MultipleGridBuilder::addGrid), py::arg("grid_shape"))
         .def("add_grid", py::overload_cast<SPtr<Object>, uint>(&MultipleGridBuilder::addGrid), py::arg("grid_shape"), py::arg("level_fine"))
diff --git a/pythonbindings/src/gpu/submodules/simulation.cpp b/pythonbindings/src/gpu/submodules/simulation.cpp
index d32ef272a1fd26510439dde6ab3a9438d68009a7..545fe082fe59cacabde07052ccf1b041d0af7e25 100644
--- a/pythonbindings/src/gpu/submodules/simulation.cpp
+++ b/pythonbindings/src/gpu/submodules/simulation.cpp
@@ -32,7 +32,6 @@
 //=======================================================================================
 #include <pybind11/pybind11.h>
 #include <gpu/VirtualFluids_GPU/LBM/Simulation.h>
-#include <gpu/VirtualFluids_GPU/Communication/Communicator.h>
 #include <gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactory.h>
 #include <gpu/VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactory.h>
 #include <gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h>
@@ -43,6 +42,7 @@
 #include "gpu/VirtualFluids_GPU/Factories/BoundaryConditionFactory.h"
 #include "gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
 #include "gpu/VirtualFluids_GPU/Factories/GridScalingFactory.h"
+#include "parallel/Communicator.h"
 
 namespace simulation
 {
@@ -54,7 +54,7 @@ namespace simulation
         py::class_<Simulation>(parentModule, "Simulation")
         .def(py::init<  std::shared_ptr<Parameter>,
                         std::shared_ptr<CudaMemoryManager>,
-                        vf::gpu::Communicator &,
+                        vf::parallel::Communicator &,
                         GridProvider &,
                         BoundaryConditionFactory*,
                         GridScalingFactory*>(), 
@@ -66,7 +66,7 @@ namespace simulation
                         py::arg("gridScalingFactory"))
         .def(py::init<  std::shared_ptr<Parameter>,
                         std::shared_ptr<CudaMemoryManager>,
-                        vf::gpu::Communicator &,
+                        vf::parallel::Communicator &,
                         GridProvider &,
                         BoundaryConditionFactory*>(), 
                         py::arg("parameter"),
@@ -76,7 +76,7 @@ namespace simulation
                         py::arg("bcFactory"))
         .def(py::init<  std::shared_ptr<Parameter>,
                         std::shared_ptr<CudaMemoryManager>,
-                        vf::gpu::Communicator &,
+                        vf::parallel::Communicator &,
                         GridProvider &,
                         BoundaryConditionFactory*,
                         std::shared_ptr<TurbulenceModelFactory>,
diff --git a/regression-tests/driven_cavity_uniform_test.sh b/regression-tests/driven_cavity_uniform_test.sh
index deb1300cad5914e69a4f2c01428bbef31d7af6d3..c7bb78d014cdd4b2103932c5a9e5134bd8f7b767 100755
--- a/regression-tests/driven_cavity_uniform_test.sh
+++ b/regression-tests/driven_cavity_uniform_test.sh
@@ -7,7 +7,7 @@ source ./regression-tests/__regression_test_executer.sh
 REFERENCE_DATA_DIR=regression_tests/gpu/DrivenCavity_uniform
 
 # 2. set cmake flags for the build of VirtualFluids
-CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS="apps/gpu/LBM/DrivenCavityUniform""
+CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS="apps/gpu/DrivenCavityUniform""
 
 # 3. define the application to be executed
 APPLICATION=./build/bin/DrivenCavityUniform
diff --git a/regression-tests/multigpu_test/rocket4GPU.yml b/regression-tests/multigpu_test/rocket4GPU.yml
index a7ece055707c26a7a27e9c12be3447dc1b77855f..e8722cf367832104a12935af5bbbe660ce8e7fcc 100755
--- a/regression-tests/multigpu_test/rocket4GPU.yml
+++ b/regression-tests/multigpu_test/rocket4GPU.yml
@@ -31,8 +31,8 @@ copy:
     to: "multigpu_test/CMakePresets.json"
     overwrite: true
 
-  - from: "apps/gpu/LBM/"
-    to: "multigpu_test/apps/gpu/LBM/"
+  - from: "apps/gpu/"
+    to: "multigpu_test/apps/gpu/"
     overwrite: true
 
 collect:
diff --git a/regression-tests/multigpu_test/rocket8GPU.yml b/regression-tests/multigpu_test/rocket8GPU.yml
index 4b434fc8a2433dab513649800dbe3f160d986edd..d179dbc84d1f80b0efb9fcb368bffaa2732c26da 100755
--- a/regression-tests/multigpu_test/rocket8GPU.yml
+++ b/regression-tests/multigpu_test/rocket8GPU.yml
@@ -31,8 +31,8 @@ copy:
     to: "multigpu_test/CMakePresets.json"
     overwrite: true
 
-  - from: "apps/gpu/LBM/"
-    to: "multigpu_test/apps/gpu/LBM/"
+  - from: "apps/gpu/"
+    to: "multigpu_test/apps/gpu/"
     overwrite: true
 
 collect:
diff --git a/regression-tests/multigpu_test/slurm4GPU.job b/regression-tests/multigpu_test/slurm4GPU.job
index 0be42c51bac9a341b56eb705f9bdb518883f507d..ce678a57945c87f881f88887e88d79dff51bef0e 100755
--- a/regression-tests/multigpu_test/slurm4GPU.job
+++ b/regression-tests/multigpu_test/slurm4GPU.job
@@ -26,13 +26,13 @@ module list
 cd multigpu_test
 rm -rf build && mkdir -p build
 cd build
-cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/DrivenCavityMultiGPU\;apps/gpu/SphereScaling
 make -j 16
 cd ..
 mkdir -p output
 
 echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 4 GPUs\n\n'
-mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt"
+mpirun -np 4 "./build/bin/DrivenCavityMultiGPU" "apps/gpu/DrivenCavityMultiGPU/configPhoenix4GPU_regressionTest.txt"
 
 echo $'\n\n\n\n---Second test: SphereScaling on 4 GPUs\n\n'
-mpirun -np 4 "./build/bin/SphereScaling"        "apps/gpu/LBM/SphereScaling/configPhoenix4GPU_regressionTest.txt"
\ No newline at end of file
+mpirun -np 4 "./build/bin/SphereScaling"        "apps/gpu/SphereScaling/configPhoenix4GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/multigpu_test/slurm8GPU.job b/regression-tests/multigpu_test/slurm8GPU.job
index bb7bf55c70eb6b178eff3f52e18c35d7cafd6938..0b97ce0cd17325f59d28c9fe0da92724201b1304 100755
--- a/regression-tests/multigpu_test/slurm8GPU.job
+++ b/regression-tests/multigpu_test/slurm8GPU.job
@@ -26,13 +26,13 @@ module list
 cd multigpu_test
 rm -rf build && mkdir -p build
 cd build
-cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/LBM/DrivenCavityMultiGPU\;apps/gpu/LBM/SphereScaling
+cmake .. -DBUILD_VF_GPU=ON -DCMAKE_CUDA_ARCHITECTURES=60 -DUSER_APPS=apps/gpu/DrivenCavityMultiGPU\;apps/gpu/SphereScaling
 make -j 16
 cd ..
 mkdir -p output
 
 echo $'\n\n\n\n---First test: DrivenCavityMultiGPU on 8 GPUs\n\n'
-mpirun -np 8 "./build/bin/DrivenCavityMultiGPU" "apps/gpu/LBM/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt"
+mpirun -np 8 "./build/bin/DrivenCavityMultiGPU" "apps/gpu/DrivenCavityMultiGPU/configPhoenix8GPU_regressionTest.txt"
 
 echo $'\n\n\n\n---Second test: SphereScaling on 8 GPUs\n\n'
-mpirun -np 8 "./build/bin/SphereScaling"        "apps/gpu/LBM/SphereScaling/configPhoenix8GPU_regressionTest.txt"
\ No newline at end of file
+mpirun -np 8 "./build/bin/SphereScaling"        "apps/gpu/SphereScaling/configPhoenix8GPU_regressionTest.txt"
\ No newline at end of file
diff --git a/regression-tests/refined_sphere_in_channel_test.sh b/regression-tests/refined_sphere_in_channel_test.sh
index 447b0ba9ff0c1f858e0061d98cd5fabca786de9c..51b6fa9481761f9727b4e4d7dc96945acae49265 100644
--- a/regression-tests/refined_sphere_in_channel_test.sh
+++ b/regression-tests/refined_sphere_in_channel_test.sh
@@ -6,7 +6,7 @@ source ./regression-tests/__regression_test_executer.sh
 REFERENCE_DATA_DIR=regression_tests/gpu/SphereInChannel_3Levels
 
 # 2. set cmake flags for the build of VirtualFluids
-CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS=apps/gpu/LBM/SphereRefined"
+CMAKE_FLAGS="--preset=make_gpu -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=75 -DUSER_APPS=apps/gpu/SphereRefined"
 
 # 3. define the application to be executed
 APPLICATION=./build/bin/SphereRefined
diff --git a/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.cpp b/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.cpp
index 7b7fafd8a653f9a33559d9f569c6add4fba929f9..718301257075a5e594b1f575d7ae46dd77aacb2c 100644
--- a/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.cpp
+++ b/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.cpp
@@ -13,7 +13,7 @@
 #include "fix_lb_coupling_onetoone.h"
 
 LiggghtsCouplingSimulationObserver::LiggghtsCouplingSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
-                                                         SPtr<vf::mpi::Communicator> comm,
+                                                         SPtr<vf::parallel::Communicator> comm,
                                                          LiggghtsCouplingWrapper &wrapper, int demSteps,
                                                          SPtr<LBMUnitConverter> units)
     : SimulationObserver(grid, s), comm(comm), wrapper(wrapper), demSteps(demSteps), units(units)
diff --git a/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.h b/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.h
index 0ae1786dd72346456621f3f4cf584679fca64f42..fb4938328b9d18e8cef614a09cbad8894c5aa497 100644
--- a/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.h
+++ b/src/cpu/LiggghtsCoupling/SimulationObserver/LiggghtsCouplingSimulationObserver.h
@@ -46,7 +46,7 @@
 
 
 class SimulationObserver;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class LiggghtsCouplingWrapper;
 class Grid3D;
 class Block3D;
@@ -61,7 +61,7 @@ struct ParticleData {
 class LiggghtsCouplingSimulationObserver : public SimulationObserver
 {
 public:
-    LiggghtsCouplingSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<vf::mpi::Communicator> comm,
+    LiggghtsCouplingSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<vf::parallel::Communicator> comm,
                                 LiggghtsCouplingWrapper &wrapper, int demSteps, SPtr<LBMUnitConverter> units);
     virtual ~LiggghtsCouplingSimulationObserver();
 
@@ -88,7 +88,7 @@ protected:
     void addTorque(int const partId, int const coord, double const value, double *torque);
 
 private:
-    SPtr<vf::mpi::Communicator> comm;
+    SPtr<vf::parallel::Communicator> comm;
     LiggghtsCouplingWrapper &wrapper;
     SPtr<LBMUnitConverter> units;
     int demSteps;
diff --git a/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.cpp b/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.cpp
index adc6a4f81f77cc57a4030643517cc583b61b3c7d..820c84855c66c695423beec09d1ccfa3953dc4e9 100644
--- a/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.cpp
+++ b/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.cpp
@@ -39,7 +39,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
@@ -53,7 +53,7 @@ WriteMultiphaseQuantitiesSimulationObserver::WriteMultiphaseQuantitiesSimulation
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                             std::shared_ptr<vf::parallel::Communicator> comm)
         : SimulationObserver(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.h b/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.h
index 452a06d3bbb23943995f42ce84d712aa3c01b6a4..e5ba399ad57155c332cc2cd8520c51c8e86c441c 100644
--- a/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.h
+++ b/src/cpu/MultiphaseFlow/SimulationObservers/WriteMultiphaseQuantitiesSimulationObserver.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteMultiphaseQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteMultiphaseQuantitiesSimulationObserver() override = default;
 
     void update(real step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     real gradX1_phi(const real *const &);
     real gradX2_phi(const real *const &);
diff --git a/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.cpp b/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.cpp
index 64ef23bcffb2d9b5478b1f57590d7d297ab08f52..7a0ae87b6cb3674557af510089548d7352913cc6 100644
--- a/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.cpp
+++ b/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.cpp
@@ -39,7 +39,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
@@ -52,7 +52,7 @@ WriteSharpInterfaceQuantitiesSimulationObserver::WriteSharpInterfaceQuantitiesSi
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                             std::shared_ptr<vf::parallel::Communicator> comm)
         : SimulationObserver(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.h b/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.h
index 8c1e6347862a80cf25ab26a99b1304c731e2e2ee..bf42cbfa46b8c6b080da16b1732bd242674e48c5 100644
--- a/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.h
+++ b/src/cpu/MultiphaseFlow/SimulationObservers/WriteSharpInterfaceQuantitiesSimulationObserver.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteSharpInterfaceQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteSharpInterfaceQuantitiesSimulationObserver() override = default;
 
     void update(double step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     real gradX1_phi(const real *const &);
     real gradX2_phi(const real *const &);
diff --git a/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.cpp b/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.cpp
index 82adf2f0e2e59a36e4585d42d9baadb4e793f55f..7ed670deb9619565e9c11d18041cc522485ba2f6 100644
--- a/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.cpp
+++ b/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.cpp
@@ -1,7 +1,7 @@
 #include "CalculateTorqueSimulationObserver.h"
 #include "BCSet.h"
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "UbScheduler.h"
 #include "Grid3D.h"
@@ -14,7 +14,7 @@
 #include "DistributionArray3D.h"
 #include "NonNewtonianFluids/LBM/Rheology.h"
 
-CalculateTorqueSimulationObserver::CalculateTorqueSimulationObserver( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::mpi::Communicator> comm) : SimulationObserver(grid, s), path(path_), comm(comm), torqueX1global(0), torqueX2global(0), torqueX3global(0)
+CalculateTorqueSimulationObserver::CalculateTorqueSimulationObserver( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::parallel::Communicator> comm) : SimulationObserver(grid, s), path(path_), comm(comm), torqueX1global(0), torqueX2global(0), torqueX3global(0)
 {
    if (comm->getProcessID() == comm->getRoot())
    {
diff --git a/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.h b/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.h
index d0cd9c41d5afc4e127f9be74e106960c86097f96..e1948d95f69daa795fe8ea5e1733b5b8be1f2754 100644
--- a/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.h
+++ b/src/cpu/NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.h
@@ -17,7 +17,7 @@
 #include "D3Q27System.h"
 
 class ForceCalculator;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class D3Q27Interactor;
@@ -29,7 +29,7 @@ class CalculateTorqueSimulationObserver: public SimulationObserver
 {
 public:
    //! Constructor
-   CalculateTorqueSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+   CalculateTorqueSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
 	virtual ~CalculateTorqueSimulationObserver();             
 	void update(real step); 
    void addInteractor(SPtr<D3Q27Interactor> interactor);
@@ -42,7 +42,7 @@ protected:
 
 private:
    std::string path;
-   std::shared_ptr<vf::mpi::Communicator> comm;
+   std::shared_ptr<vf::parallel::Communicator> comm;
    std::vector<SPtr<D3Q27Interactor> > interactors;
    real torqueX1global;
    real torqueX2global;
diff --git a/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.cpp b/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.cpp
index 61f13299d914bb3fd15a8d0a69712c722c91ca6f..bc3eab4fb93581a0184d96cddbd5542fa358e8ee 100644
--- a/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.cpp
+++ b/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.cpp
@@ -49,7 +49,7 @@ using namespace std;
 
 WriteThixotropyQuantitiesSimulationObserver::WriteThixotropyQuantitiesSimulationObserver() = default;
 //////////////////////////////////////////////////////////////////////////
-WriteThixotropyQuantitiesSimulationObserver::WriteThixotropyQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm) : SimulationObserver(grid, s), path(path), writer(writer),	conv(conv),	comm(comm)
+WriteThixotropyQuantitiesSimulationObserver::WriteThixotropyQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm) : SimulationObserver(grid, s), path(path), writer(writer),	conv(conv),	comm(comm)
 {
 	gridRank = comm->getProcessID();
 	minInitLevel = this->grid->getCoarsestInitializedLevel();
diff --git a/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.h b/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.h
index 3ac9664e595adef9b45edaee662849a3ab63616f..5ef994a08fe36812361903fb02e83be510faa13e 100644
--- a/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.h
+++ b/src/cpu/NonNewtonianFluids/SimulationObservers/WriteThixotropyQuantitiesSimulationObserver.h
@@ -38,14 +38,14 @@
 #include "Grid3D.h"
 #include "Block3D.h"
 #include "LBMUnitConverter.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "WbWriter.h"
 
 class WriteThixotropyQuantitiesSimulationObserver : public  SimulationObserver
 {
 public:
 	WriteThixotropyQuantitiesSimulationObserver();
-	WriteThixotropyQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
+	WriteThixotropyQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string& path, WbWriter* const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm);
 	~WriteThixotropyQuantitiesSimulationObserver() = default;
 
    void update(real step) override;
@@ -69,7 +69,7 @@ private:
    int minInitLevel;
    int maxInitLevel;
    int gridRank;
-   std::shared_ptr<vf::mpi::Communicator> comm;
+   std::shared_ptr<vf::parallel::Communicator> comm;
 //	double ConcentrationSum;
 };
 #endif
diff --git a/src/cpu/VirtualFluids.h b/src/cpu/VirtualFluids.h
index d8f79867c20ff1df40a15a6e437012246a4e5b00..e3adfb6efa82d2bcad797f0d397d537e89aab098 100644
--- a/src/cpu/VirtualFluids.h
+++ b/src/cpu/VirtualFluids.h
@@ -40,9 +40,9 @@
 #include <omp.h>
 #endif
 
-#include <mpi/Communicator.h>
-#include <mpi/MPICommunicator.h>
-#include <mpi/NullCommunicator.h>
+#include <parallel/Communicator.h>
+#include <parallel/MPICommunicator.h>
+#include <parallel/NullCommunicator.h>
 
 #include <basics/PointerDefinitions.h>
 
diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt
index aae663e80011c117a83d5a52d0ac0cbe0c59a5a8..3e767e49c6cc0d35c2c8706a20b77f17a625b9a4 100644
--- a/src/cpu/VirtualFluidsCore/CMakeLists.txt
+++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt
@@ -20,7 +20,7 @@ if(BUILD_USE_OPENMP)
     list(APPEND VF_LIBRARIES OpenMP::OpenMP_CXX)
 endif()
 
-vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser ${VF_LIBRARIES} PRIVATE_LINK lbm mpi logger)
+vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser ${VF_LIBRARIES} parallel PRIVATE_LINK lbm logger)
 
 vf_add_tests()
 
diff --git a/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp b/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
index 38e5be2e5d35a51f79cb1da8ff7ce9b8b5589656..159d04d9791dd5919e43752f6f8cf9ea97ca1dac 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/InteractorsHelper.cpp
@@ -34,7 +34,7 @@
 #include "InteractorsHelper.h"
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "SetBcBlocksBlockVisitor.h"
 #include "SetSolidBlocksBlockVisitor.h"
 #include <Grid3D.h>
@@ -100,6 +100,6 @@ void InteractorsHelper::updateGrid()
         ids.push_back(block->getGlobalID());
 
     std::vector<int> rids;
-    vf::mpi::Communicator::getInstance()->allGather(ids, rids);
+    vf::parallel::Communicator::getInstance()->allGather(ids, rids);
     grid->deleteBlocks(rids);
 }
diff --git a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
index eef54a8625147046c2d8f38e2207e2fe2d20e325..718267be635c95e53d4cd1076e2cefee90fc492b 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
+++ b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.cpp
@@ -1,3 +1,3 @@
 #include "BlocksDistributor.h"
 
-BlocksDistributor::BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm) : grid(grid), comm(comm) {}
+BlocksDistributor::BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm) : grid(grid), comm(comm) {}
diff --git a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
index 85aa52d05e0dd215ac93ca4bb08cc057f84914d0..7db87d0885f53b9651f401f9cbf9502b606efd68 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/BlocksDistributor.h
@@ -1,7 +1,7 @@
 #ifndef BlocksDistributor_H
 #define BlocksDistributor_H
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 
 #include <PointerDefinitions.h>
@@ -9,13 +9,13 @@
 class BlocksDistributor
 {
 public:
-    BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm);
+    BlocksDistributor(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm);
     ~BlocksDistributor();
 
 protected:
 private:
     SPtr<Grid3D> grid;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Simulation/Grid3D.cpp b/src/cpu/VirtualFluidsCore/Simulation/Grid3D.cpp
index a214b4bd0137b2bf319925b519f1dcb77fabded4..14987f641cf0709ce8cae1797fa0699044eaed4c 100644
--- a/src/cpu/VirtualFluidsCore/Simulation/Grid3D.cpp
+++ b/src/cpu/VirtualFluidsCore/Simulation/Grid3D.cpp
@@ -51,7 +51,7 @@ using namespace std;
 
 Grid3D::Grid3D() { levelSet.resize(D3Q27System::MAXLEVEL + 1); }
 //////////////////////////////////////////////////////////////////////////
-Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm)
+Grid3D::Grid3D(std::shared_ptr<vf::parallel::Communicator> comm)
 
 {
     levelSet.resize(D3Q27System::MAXLEVEL + 1);
@@ -59,7 +59,7 @@ Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm)
     rank = comm->getProcessID();
 }
 //////////////////////////////////////////////////////////////////////////
-Grid3D::Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3)
+Grid3D::Grid3D(std::shared_ptr<vf::parallel::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3)
     :
 
       blockNx1(blockNx1), blockNx2(blockNx2), blockNx3(blockNx2), nx1(gridNx1), nx2(gridNx2), nx3(gridNx3)
@@ -2314,7 +2314,7 @@ void Grid3D::renumberBlockIDs()
 
 
 //////////////////////////////////////////////////////////////////////////
-void Grid3D::updateDistributedBlocks(std::shared_ptr<vf::mpi::Communicator> comm)
+void Grid3D::updateDistributedBlocks(std::shared_ptr<vf::parallel::Communicator> comm)
 {
 
     std::vector<int> blocks;
diff --git a/src/cpu/VirtualFluidsCore/Simulation/Grid3D.h b/src/cpu/VirtualFluidsCore/Simulation/Grid3D.h
index 821adff473961fafa00be08ca2cf8e2339353b51..50f3ac53a27ca634f6a27a06f5af2ad1a5208884 100644
--- a/src/cpu/VirtualFluidsCore/Simulation/Grid3D.h
+++ b/src/cpu/VirtualFluidsCore/Simulation/Grid3D.h
@@ -48,7 +48,7 @@ class CoordinateTransformation3D;
 #include <Block3DVisitor.h>
 #include <Grid3DVisitor.h>
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Block3D;
 class Interactor3D;
 
@@ -65,8 +65,8 @@ public:
 
 public:
     Grid3D();
-    Grid3D(std::shared_ptr<vf::mpi::Communicator> comm);
-    Grid3D(std::shared_ptr<vf::mpi::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3);
+    Grid3D(std::shared_ptr<vf::parallel::Communicator> comm);
+    Grid3D(std::shared_ptr<vf::parallel::Communicator> comm, int blockNx1, int blockNx2, int blockNx3, int gridNx1, int gridNx2, int gridNx3);
     virtual ~Grid3D() = default;
     //////////////////////////////////////////////////////////////////////////
     // blocks control
@@ -95,7 +95,7 @@ public:
     BlockIDMap &getBlockIDs();
     void deleteBlockIDs();
     void renumberBlockIDs();
-    void updateDistributedBlocks(std::shared_ptr<vf::mpi::Communicator> comm);
+    void updateDistributedBlocks(std::shared_ptr<vf::parallel::Communicator> comm);
     SPtr<Block3D> getSuperBlock(SPtr<Block3D> block);
     SPtr<Block3D> getSuperBlock(int ix1, int ix2, int ix3, int level);
     void getSubBlocks(SPtr<Block3D> block, int levelDepth, std::vector<SPtr<Block3D>> &blocks);
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.cpp
index 2254b9b02ea383e18c654a7569f0e5b2e973c839..aeae2f61788d36a5f8da2c388976d5a2a05b71d2 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.cpp
@@ -6,7 +6,7 @@
 
 #include <fstream>
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "UbScheduler.h"
@@ -14,7 +14,7 @@
 
 AdjustForcingSimulationObserver::AdjustForcingSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                                    SPtr<IntegrateValuesHelper> integrateValues, real vTarged,
-                                                   std::shared_ptr<vf::mpi::Communicator> comm)
+                                                   std::shared_ptr<vf::parallel::Communicator> comm)
 
     : SimulationObserver(grid, s), path(path), integrateValues(integrateValues), comm(comm), vx1Targed(vTarged)
 {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.h
index 9e570e34dc43fa025c47c3d3c29c0dad4a262b99..13f88c71162efcca53b913891955b6216b3c943f 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/AdjustForcingSimulationObserver.h
@@ -7,7 +7,7 @@
 #include "SimulationObserver.h"
 #include "lbm/constants/D3Q27.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class UbScheduler;
 class Grid3D;
 class IntegrateValuesHelper;
@@ -22,7 +22,7 @@ class AdjustForcingSimulationObserver : public SimulationObserver
 {
 public:
     AdjustForcingSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<IntegrateValuesHelper> integrateValues, real vTarged, std::shared_ptr<vf::mpi::Communicator> comm);
+                             SPtr<IntegrateValuesHelper> integrateValues, real vTarged, std::shared_ptr<vf::parallel::Communicator> comm);
     //!< calls collect PostprocessData
     void update(real step) override;
 
@@ -31,7 +31,7 @@ protected:
     SPtr<IntegrateValuesHelper> integrateValues;
     //!< compares velocity in integrateValues with target velocity and adjusts forcing accordingly.
     void collectData(real step);
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
 private:
     real vx1Targed; //!< target velocity.
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/AverageValuesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/AverageValuesSimulationObserver.cpp
index 1adf3ad9944a49c8065756988e95ab837e9f6d15..d87dddb97b10f66196d3922b56a7d52713ad7817 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/AverageValuesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/AverageValuesSimulationObserver.cpp
@@ -7,7 +7,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
@@ -185,7 +185,7 @@ void AverageValuesSimulationObserver::collectData(real step)
     piece           = subfolder + "/" + piece;
 
     vector<string> cellDataNames;
-    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::Communicator::getInstance();
     vector<string> pieces   = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         string pname =
@@ -448,7 +448,7 @@ void AverageValuesSimulationObserver::calculateAverageValues(real timeStep)
 ////////////////////////////////////////////////////////////////////////////
 // void AverageValuesSimulationObserver::initPlotData(double step)
 //{
-//   std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
+//   std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::Communicator::getInstance();
 //	if (comm->getProcessID() == comm->getRoot())
 //	{
 //		std::ofstream ostr;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.cpp
index 8610c5df9e4b56496c3dc3ba1c25fabfd355f294..f1c8060ca7564b709cc344513fd01a752b2fdd61 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.cpp
@@ -4,7 +4,7 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
@@ -14,7 +14,7 @@
 #include "UbScheduler.h"
 
 CalculateForcesSimulationObserver::CalculateForcesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm, real v, real a)
+                                                       std::shared_ptr<vf::parallel::Communicator> comm, real v, real a)
     : SimulationObserver(grid, s), path(path), comm(comm), v(v), a(a), forceX1global(0), forceX2global(0), forceX3global(0)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.h
index 02b76e77bc6bfb3ee375e79465f2d548226189cb..e1d376f4c6938b3874cd2a0eebedae294bd502b1 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateForcesSimulationObserver.h
@@ -17,7 +17,7 @@
 #include "lbm/constants/D3Q27.h"
 
 class ForceCalculator;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class D3Q27Interactor;
@@ -30,7 +30,7 @@ public:
     //! Constructor
     //! \param v - velocity of fluid in LB units
     //! \param a - area of object in LB units
-    CalculateForcesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm,
+    CalculateForcesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm,
                                real v, real a);
     ~CalculateForcesSimulationObserver() override;
     void update(real step) override;
@@ -46,7 +46,7 @@ protected:
 
 private:
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     real forceX1global;
     real forceX2global;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateTorqueSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateTorqueSimulationObserver.cpp
index 768fbbb26241edfe5771bf056b6b83be21b02312..6d3b22d642a6b27e3b694cf9fcb7e0a1a1eb4acb 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateTorqueSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/CalculateTorqueSimulationObserver.cpp
@@ -1,7 +1,7 @@
 #include "NonNewtonianFluids/SimulationObservers/CalculateTorqueSimulationObserver.h"
 #include "BCSet.h"
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "UbScheduler.h"
 #include "Grid3D.h"
@@ -14,7 +14,7 @@
 #include "DistributionArray3D.h"
 #include "NonNewtonianFluids/LBM/Rheology.h"
 
-CalculateTorqueSimulationObserver::CalculateTorqueSimulationObserver( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::mpi::Communicator> comm) : SimulationObserver(grid, s), path(path_), comm(comm), torqueX1global(0), torqueX2global(0), torqueX3global(0)
+CalculateTorqueSimulationObserver::CalculateTorqueSimulationObserver( SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path_, std::shared_ptr<vf::parallel::Communicator> comm) : SimulationObserver(grid, s), path(path_), comm(comm), torqueX1global(0), torqueX2global(0), torqueX3global(0)
 {
    if (comm->getProcessID() == comm->getRoot())
    {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.cpp
index 0754491767b55e2a6059c8eef160a94a1f087540..ffbfde51ce2d8100cc845364e31038180892f800 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.cpp
@@ -10,13 +10,13 @@
 #include <vector>
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "LBMKernel.h"
 #include "UbScheduler.h"
 
 DecreaseViscositySimulationObserver::DecreaseViscositySimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc,
-                                                           std::shared_ptr<vf::mpi::Communicator> comm)
+                                                           std::shared_ptr<vf::parallel::Communicator> comm)
 
     : SimulationObserver(grid, s), nueFunc(nueFunc), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.h
index 2e2c655d223619169e0f3edd3bfa554e924639e0..741b65783c2634d45f6d012f67e1f42283b22d9c 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/DecreaseViscositySimulationObserver.h
@@ -11,7 +11,7 @@
 
 class UbScheduler;
 class Grid3D;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 
 //! \brief The class sets viscosity/collision factor according to a previously defined function in time.
 //! \details initialization in test case (example):
@@ -28,7 +28,7 @@ namespace vf::mpi {class Communicator;}
 class DecreaseViscositySimulationObserver : public SimulationObserver
 {
 public:
-    DecreaseViscositySimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, std::shared_ptr<vf::mpi::Communicator> comm);
+    DecreaseViscositySimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, mu::Parser *nueFunc, std::shared_ptr<vf::parallel::Communicator> comm);
     ~DecreaseViscositySimulationObserver() override;
     //! calls collect PostprocessData.
     void update(real step) override;
@@ -36,7 +36,7 @@ public:
 protected:
     //! resets the collision factor depending on the current timestep.
     void setViscosity(real step);
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
 private:
     mutable mu::value_type timeStep;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.cpp
index a6826a713b45f74239c603d9a23b946169ac60d5..ea6287ff358bce160791c7ab16568a9cc4a989bb 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.cpp
@@ -1,5 +1,5 @@
 #include "EmergencyExitSimulationObserver.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "MPIIORestartSimulationObserver.h"
 #include "UbLogger.h"
@@ -8,7 +8,7 @@
 #include <basics/utilities/UbFileOutputASCII.h>
 
 EmergencyExitSimulationObserver::EmergencyExitSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                   SPtr<MPIIORestartSimulationObserver> rp, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                   SPtr<MPIIORestartSimulationObserver> rp, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), rp(rp), comm(comm)
 {
     this->path = path + "/exit";
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.h
index f4a8e79f6f8b89f1b4e37714f0c42d2be4be1810..f2757d8ed842f14d77b16b7d1aa2821e5a8b8d72 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/EmergencyExitSimulationObserver.h
@@ -14,7 +14,7 @@
 #include "SimulationObserver.h"
 
 class MPIIORestartSimulationObserver;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -22,7 +22,7 @@ class EmergencyExitSimulationObserver : public SimulationObserver
 {
 public:
     EmergencyExitSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                             SPtr<MPIIORestartSimulationObserver> rp, std::shared_ptr<vf::mpi::Communicator> comm);
+                             SPtr<MPIIORestartSimulationObserver> rp, std::shared_ptr<vf::parallel::Communicator> comm);
     ~EmergencyExitSimulationObserver() override;
 
     void update(real step) override;
@@ -35,7 +35,7 @@ protected:
 
 private:
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     SPtr<MPIIORestartSimulationObserver> rp;
     std::string metafile;
 };
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.cpp
index 9a39ce11ed15e939e9fc32eaeb15d541675387aa..7f2b30a96ca8bac612fa5982c21c9cbe76d3b7ff 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.cpp
@@ -4,13 +4,13 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
 #include "LBMKernel.h"
 
-ForceCalculator::ForceCalculator(std::shared_ptr<vf::mpi::Communicator> comm)
+ForceCalculator::ForceCalculator(std::shared_ptr<vf::parallel::Communicator> comm)
     : comm(comm), forceX1global(0), forceX2global(0), forceX3global(0)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.h b/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.h
index 03b00f3603c3e8aac25567b7f370e81b61d3ef76..6f7266d8e49a0f1e9cce4192712b37f1306ff5e0 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/ForceCalculator.h
@@ -15,14 +15,14 @@
 #include "Vector3D.h"
 
 class D3Q27Interactor;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class DistributionArray3D;
 class BoundaryConditions;
 
 class ForceCalculator
 {
 public:
-    ForceCalculator(std::shared_ptr<vf::mpi::Communicator> comm);
+    ForceCalculator(std::shared_ptr<vf::parallel::Communicator> comm);
     virtual ~ForceCalculator();
 
     void calculateForces(std::vector<std::shared_ptr<D3Q27Interactor>> interactors);
@@ -35,7 +35,7 @@ public:
 private:
     void gatherGlobalForces();
 
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     real forceX1global;
     real forceX2global;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/InSituCatalystSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/InSituCatalystSimulationObserver.cpp
index 4e8fd6d5f39fbeb581ace18bf544fa5346719850..07a27f074c7ac9cc9850db90f94fec19687fa4cb 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/InSituCatalystSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/InSituCatalystSimulationObserver.cpp
@@ -20,7 +20,7 @@ InSituCatalystSimulationObserver::InSituCatalystSimulationObserver() {}
 InSituCatalystSimulationObserver::InSituCatalystSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, std::string script)
     : SimulationObserver(grid, s)
 {
-    gridRank     = vf::mpi::Communicator::getInstance()->getProcessID();
+    gridRank     = vf::parallel::Communicator::getInstance()->getProcessID();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
     maxInitLevel = this->grid->getFinestInitializedLevel();
 
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/InSituVTKSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/InSituVTKSimulationObserver.cpp
index 74c1b653bd4f8b5f2def3492f83fa38677170feb..2dbdcb6373e24a57ebeafe6243db4ccaa083e932 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/InSituVTKSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/InSituVTKSimulationObserver.cpp
@@ -30,7 +30,7 @@ InSituVTKSimulationObserver::InSituVTKSimulationObserver(SPtr<Grid3D> grid, SPtr
                                            SPtr<LBMUnitConverter> conv)
     : SimulationObserver(grid, s), conv(conv)
 {
-    gridRank     = vf::mpi::Communicator::getInstance()->getProcessID();
+    gridRank     = vf::parallel::Communicator::getInstance()->getProcessID();
     minInitLevel = this->grid->getCoarsestInitializedLevel();
     maxInitLevel = this->grid->getFinestInitializedLevel();
 
@@ -269,7 +269,7 @@ void InSituVTKSimulationObserver::readConfigFile(const std::string &configFile)
     string dummy;
     int wRank = 0;
     getline(ifs, dummy);
-    int np = vf::mpi::Communicator::getInstance()->getNumberOfProcesses();
+    int np = vf::parallel::Communicator::getInstance()->getNumberOfProcesses();
 
     while (ifs.good()) {
         getline(ifs, dummy, ';');
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.cpp
index 7eabcd2849f2fca11cb057357492fa1062c46dce..da55dbee109b9d6d19645f4ff78679a53eadb65e 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.cpp
@@ -10,7 +10,7 @@
 #include "LBMKernel.h"
 
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm, real minX1, real minX2,
                                              real minX3, real maxX1, real maxX2, real maxX3)
     :
 
@@ -21,7 +21,7 @@ IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<
     init(-1);
 }
 //////////////////////////////////////////////////////////////////////////
-IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2,
+IntegrateValuesHelper::IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm, real minX1, real minX2,
                                              real minX3, real maxX1, real maxX2, real maxX3, int level)
     :
 
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.h b/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.h
index c804d74628570c4592c6715b7f76cd450c90ecfb..6404ca7bfab37599c6c833b53798f9048c5e265b 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/IntegrateValuesHelper.h
@@ -5,7 +5,7 @@
 
 #include "Block3D.h"
 #include "CbArray2D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27System.h"
 #include "GbCuboid3D.h"
 #include "Grid3D.h"
@@ -36,9 +36,9 @@ public:
     };
 
 public:
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm, real minX1, real minX2, real minX3,
                           real maxX1, real maxX2, real maxX3);
-    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::mpi::Communicator> comm, real minX1, real minX2, real minX3,
+    IntegrateValuesHelper(SPtr<Grid3D> grid, std::shared_ptr<vf::parallel::Communicator> comm, real minX1, real minX2, real minX3,
                           real maxX1, real maxX2, real maxX3, int level);
     virtual ~IntegrateValuesHelper();
 
@@ -77,7 +77,7 @@ private:
     real sAvVx1, sAvVx2, sAvVx3, sTSx1, sTSx2, sTSx3, sTSx1x3;
     std::vector<CalcNodes> cnodes;
     GbCuboid3DPtr boundingBox;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     CbArray2D<Node> cnodes2DMatrix;
     enum Values { AvVx = 0, AvVy = 1, AvVz = 2, AvVxx = 3, AvVyy = 4, AvVzz = 5, AvVxy = 6, AvVyz = 7, AvVxz = 8 };
 };
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.cpp
index 75350fb6e0904c434519a241228d7662c84ecf22..e312bf2b27aa15b51146aab9559c201ed88bbd7e 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.cpp
@@ -3,7 +3,7 @@
 #include "WbWriterVtkXmlASCII.h"
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CompressibleCumulantLBMKernel.h"
 #include "CoordinateTransformation3D.h"
 #include "DataSet3D.h"
@@ -13,7 +13,7 @@
 #include "UbScheduler.h"
 
 LineTimeSeriesSimulationObserver::LineTimeSeriesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                     SPtr<GbLine3D> line, int level, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                     SPtr<GbLine3D> line, int level, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), length(0), ix1(0), ix2(0), ix3(0), level(level), line(line)
 {
     root  = comm->isRoot();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.h
index 0f8a9ab44ac60e16708be7453e65809ec0505155..db4fea82f2aecfd87a4c90cdea36cc310b73aabe 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/LineTimeSeriesSimulationObserver.h
@@ -9,7 +9,7 @@
 #include "SimulationObserver.h"
 #include "LBMSystem.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class GbLine3D;
@@ -27,7 +27,7 @@ public:
 
 public:
     LineTimeSeriesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<GbLine3D> line,
-                              int level, std::shared_ptr<vf::mpi::Communicator> comm);
+                              int level, std::shared_ptr<vf::parallel::Communicator> comm);
     ~LineTimeSeriesSimulationObserver() override = default;
 
     void update(real step) override;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.cpp
index 5cdc87c6867c5ec2a8676e66a228ba5e768014fb..6fc3eb9b7b4a990897d42e8a480b9bb16affa868 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.cpp
@@ -3,7 +3,7 @@
 #include "BCSet.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -25,7 +25,7 @@ using namespace MPIIODataStructures;
 #define MESSAGE_TAG 80
 #define SEND_BLOCK_SIZE 100000
 
-MPIIOMigrationBESimulationObserver::MPIIOMigrationBESimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
+MPIIOMigrationBESimulationObserver::MPIIOMigrationBESimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm)
     : MPIIOSimulationObserver(grid, s, path, comm), nue(-999.999), nuL(-999.999), nuG(-999.999), densityRatio(-999.999)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.h
index fa55ea17a350ec9a1de215892258526142f6d19b..cec360a7300e07909c0b5b6bc14a7969f8686264 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationBESimulationObserver.h
@@ -10,7 +10,7 @@
 
 class Grid3D;
 class UbScheduler;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class BCSet;
 class LBMKernel;
 class Grid3DVisitor;
@@ -33,7 +33,7 @@ class MPIIOMigrationBESimulationObserver : public MPIIOSimulationObserver
 
 public:
     MPIIOMigrationBESimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path,
-                                std::shared_ptr<vf::mpi::Communicator> comm);
+                                std::shared_ptr<vf::parallel::Communicator> comm);
     ~MPIIOMigrationBESimulationObserver() override;
     //! Each timestep writes the grid into the files
     void update(real step) override;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.cpp
index 860b3f02ab1db6c1554ca9f87069281520100bd6..b2bbc1bcd0fa64730eba69b7b355af8ddabab4eb 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.cpp
@@ -3,7 +3,7 @@
 #include "BCSet.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -22,7 +22,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIOMigrationSimulationObserver::MPIIOMigrationSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
+MPIIOMigrationSimulationObserver::MPIIOMigrationSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm)
     : MPIIOSimulationObserver(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.h
index 588366a6498c107600d377a65819e11100aa6702..bf70641fdabf6ffc41b49a0b5c9986812166b3ee 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOMigrationSimulationObserver.h
@@ -9,7 +9,7 @@
 
 class Grid3D;
 class UbScheduler;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class BCSet;
 class LBMKernel;
 class Grid3DVisitor;
@@ -31,7 +31,7 @@ public:
         PressureField = 9
     };
 
-    MPIIOMigrationSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+    MPIIOMigrationSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<Grid3DVisitor> mV, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     ~MPIIOMigrationSimulationObserver() override;
     //! Each timestep writes the grid into the files
     void update(real step) override;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.cpp
index fdc3f4d4347f56d52c2b8a4952da3309ca90fe19..e4722b31f692191a5b8d1cb9bc3a62d8ac46cfe8 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.cpp
@@ -3,7 +3,7 @@
 #include "BCSet.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
@@ -25,7 +25,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIORestartSimulationObserver::MPIIORestartSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
+MPIIORestartSimulationObserver::MPIIORestartSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm)
     : MPIIOSimulationObserver(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.h
index 5681d9886e3a6f9a4b8d1cf1b8d32e9970fb8855..d07bf6b77b8a4f56da0d4a6c88139f4d01bca104 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIORestartSimulationObserver.h
@@ -11,7 +11,7 @@
 
 class Grid3D;
 class UbScheduler;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class BCSet;
 class LBMKernel;
 
@@ -32,7 +32,7 @@ public:
         PressureField = 9
     };
 
-    MPIIORestartSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+    MPIIORestartSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     ~MPIIORestartSimulationObserver() override;
     //! Each timestep writes the grid into the files
     void update(real step) override;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.cpp
index adb47a75a2fbd7fc0e8242ef9b013f6324ae066b..19ea0482a1528a2c8a0e64d4d9f7c85e1a8ed612 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.cpp
@@ -1,6 +1,6 @@
 #include "MPIIOSimulationObserver.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "Grid3D.h"
 #include "MPIIODataStructures.h"
@@ -13,7 +13,7 @@
 using namespace MPIIODataStructures;
 
 MPIIOSimulationObserver::MPIIOSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                   std::shared_ptr<vf::mpi::Communicator> comm)
+                                   std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), comm(comm)
 {
     UbSystem::makeDirectory(path + "/mpi_io_cp");
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.h
index c04938a71e62db4753689ad35160a7f980c0c0cb..9ecaf89a74d2f42db7d35349b27389f0f1cfc796 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MPIIOSimulationObserver.h
@@ -8,14 +8,14 @@
 
 class Grid3D;
 class UbScheduler;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 
 //! \class MPIWriteBlocksBESimulationObserver
 //! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating
 class MPIIOSimulationObserver : public SimulationObserver
 {
 public:
-    MPIIOSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+    MPIIOSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     ~MPIIOSimulationObserver() override;
 
     //! Each timestep writes the grid into the files
@@ -37,7 +37,7 @@ public:
 
 protected:
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     MPI_Datatype gridParamType, block3dType, dataSetParamType, boundCondType, arrayPresenceType;
 };
 #endif // ! _MPIIOSimulationObserver_H_
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.cpp
index 2979c841c9c07e44b5a22998897d5665c67b51dc..10749b5130e387205f01e57f4c71c90c90f46e02 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.cpp
@@ -2,7 +2,7 @@
 #include "BCArray3D.h"
 #include "BCSet.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27System.h"
 #include "DataSet3D.h"
 #include "DistributionArray3D.h"
@@ -13,7 +13,7 @@
 #include <sstream>
 
 MicrophoneArraySimulationObserver::MicrophoneArraySimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm)
+                                                       std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), comm(comm)
 {
     count = 0;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.h
index e87954fa2902df3c27833729869a70abac348508..f95b435ba8b784c9e7d25ac80399239460144c9b 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/MicrophoneArraySimulationObserver.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <vector>
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class Vector3D;
@@ -23,7 +23,7 @@ class MicrophoneArraySimulationObserver : public SimulationObserver
 {
 public:
     MicrophoneArraySimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                               std::shared_ptr<vf::mpi::Communicator> comm);
+                               std::shared_ptr<vf::parallel::Communicator> comm);
     ~MicrophoneArraySimulationObserver() override;
 
     //! calls collectData.
@@ -38,7 +38,7 @@ protected:
 
 private:
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     struct Mic {
         unsigned int id;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.cpp
index 3bd0bd6f888e64da0db8f492c872c4b15a518b4a..81d7217c4b848ae3a6510fc1920c7621ee71c0aa 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.cpp
@@ -33,12 +33,12 @@
 
 #include "NUPSCounterSimulationObserver.h"
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "UbScheduler.h"
 
 NUPSCounterSimulationObserver::NUPSCounterSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads,
-                                               std::shared_ptr<vf::mpi::Communicator> comm)
+                                               std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), numOfThreads(numOfThreads), nup(0), nup_t(0), nupsStep(0.0), comm(comm)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.h
index fdce1c4d67519b6d0a109e3cbd13e23b69d0fb19..f0585bbdb911e3dd33262d1a790a10b97da789de 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/NUPSCounterSimulationObserver.h
@@ -39,7 +39,7 @@
 #include "SimulationObserver.h"
 #include "basics/utilities/UbTiming.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -54,7 +54,7 @@ public:
     //! \param s is UbScheduler object for scheduling of observer
     //! \param numOfThreads is number of threads
     //! \param comm is Communicator object
-    NUPSCounterSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, std::shared_ptr<vf::mpi::Communicator> comm);
+    NUPSCounterSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, int numOfThreads, std::shared_ptr<vf::parallel::Communicator> comm);
     ~NUPSCounterSimulationObserver() override;
 
     void update(real step) override;
@@ -70,7 +70,7 @@ protected:
     real nup;
     real nup_t;
     real nupsStep;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.cpp
index 7c9cd4b85e1404339b2c180bef93eafedff38c23..f36997c05dc92970b743a57d1fc9b79f92a2df51 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.cpp
@@ -4,7 +4,7 @@
 #include "BCArray3D.h"
 #include "BCSet.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "GbCuboid3D.h"
@@ -14,7 +14,7 @@
 
 PressureCoefficientSimulationObserver::PressureCoefficientSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                GbCuboid3DPtr plane, const std::string &path,
-                                                               std::shared_ptr<vf::mpi::Communicator> comm)
+                                                               std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), plane(plane), path(path), comm(comm)
 {
     maxStep       = scheduler->getMaxEnd();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.h
index bfb56a65dd31fd6f434462471c6a236340b05db0..16e14af6420c5b1f634bb3f7988ba9c3f8595881 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureCoefficientSimulationObserver.h
@@ -11,7 +11,7 @@
 
 class GbCuboid3D;
 class D3Q27Interactor;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 
@@ -19,7 +19,7 @@ class PressureCoefficientSimulationObserver : public SimulationObserver
 {
 public:
     PressureCoefficientSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<GbCuboid3D> plane,
-                                   const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+                                   const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     ~PressureCoefficientSimulationObserver() override;
 
     void update(real step) override;
@@ -35,7 +35,7 @@ protected:
 private:
     SPtr<GbCuboid3D> plane;
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     std::vector<SPtr<D3Q27Interactor>> interactors;
     int numberOfSteps;
     real maxStep;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.cpp
index 9b3c63f407b9fac00de6177a369fec2cb3e74a82..c8726bd576191bd526a954c1782c0fa7a21a9f1a 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.cpp
@@ -9,7 +9,7 @@
 
 #include <fstream>
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "LBMUnitConverter.h"
@@ -18,7 +18,7 @@
 PressureDifferenceSimulationObserver::PressureDifferenceSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                              const std::string &path, SPtr<IntegrateValuesHelper> h1,
                                                              SPtr<IntegrateValuesHelper> h2, real rhoReal,
-                                                             real uReal, real uLB, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                             real uReal, real uLB, std::shared_ptr<vf::parallel::Communicator> comm)
 
     : SimulationObserver(grid, s), path(path), h1(h1), h2(h2), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.h
index 35356d25f9fa941500188fc75850d1bb7c8f86fb..df9d5364bf68921ebf1c567f0cdfd4c4ed76a92a 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/PressureDifferenceSimulationObserver.h
@@ -14,7 +14,7 @@
 #include "SimulationObserver.h"
 #include "LBMSystem.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -26,7 +26,7 @@ public:
     PressureDifferenceSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
                                   SPtr<IntegrateValuesHelper> h1, SPtr<IntegrateValuesHelper> h2, real rhoReal,
                                   real uReal, real uLB,
-                                  /*const SPtr<LBMUnitConverter> conv,*/ std::shared_ptr<vf::mpi::Communicator> comm);
+                                  /*const SPtr<LBMUnitConverter> conv,*/ std::shared_ptr<vf::parallel::Communicator> comm);
     ~PressureDifferenceSimulationObserver() override;
 
     void update(real step) override;
@@ -36,7 +36,7 @@ protected:
     std::string path;
     SPtr<LBMUnitConverter> conv;
     void collectData(real step);
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     real factor1; //= (1/3)*rhoReal*(uReal/uLB)^2 for calculation pReal = rhoLB * (1/3)*rhoReal*(uReal/uLB)^2,
                      //rhoReal and uReal in SI
     real factor2; //= rhoReal*(uReal/uLB)^2       for calculation pReal = press * rhoReal*(uReal/uLB)^2, rhoReal and
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.cpp
index f94b1b4472e894eb8cb975f9013b6d2aabb5214a..2060160f932fe49ba8d18e9e03396c8b62b64f50 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.cpp
@@ -7,11 +7,11 @@
 #include "basics/writer/WbWriterVtkXmlASCII.h"
 
 #include "BCArray3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "UbScheduler.h"
 
 QCriterionSimulationObserver::QCriterionSimulationObserver(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                             SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm)
+                                             SPtr<UbScheduler> s, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), comm(comm), writer(writer)
 {
     init();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.h
index 1d5aec23f9d4af9d9e232dd215fbde060a7c6f7c..45eddf04a2b838cfa52a64b10fd5fb4cfed88c29 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/QCriterionSimulationObserver.h
@@ -13,7 +13,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -29,7 +29,7 @@ class QCriterionSimulationObserver : public SimulationObserver
 {
 public:
     QCriterionSimulationObserver(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer, SPtr<UbScheduler> s,
-                          std::shared_ptr<vf::mpi::Communicator> comm);
+                          std::shared_ptr<vf::parallel::Communicator> comm);
     //! Make update if timestep is write-timestep specified in SPtr<UbScheduler> s
     void update(real step) override;
 
@@ -58,7 +58,7 @@ private:
     int gridRank; // comm-Rank des aktuellen prozesses
     std::string path;
     WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     enum Values { xdir = 0, ydir = 1, zdir = 2 }; // labels for the different components
 };
 
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/ShearStressSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/ShearStressSimulationObserver.cpp
index 2d7863292e8fc0eb153d78c2b4edfdf8a7cb4235..b90dd53d7726386b24d6b511a4c6c3ed0adf5640 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/ShearStressSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/ShearStressSimulationObserver.cpp
@@ -4,7 +4,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27Interactor.h"
 #include "DataSet3D.h"
 #include "Grid3D.h"
@@ -16,7 +16,7 @@ ShearStressSimulationObserver::ShearStressSimulationObserver(SPtr<Grid3D> grid,
                                                SPtr<UbScheduler> s, SPtr<UbScheduler> rs)
     : SimulationObserver(grid, s), Resetscheduler(rs), path(path), writer(writer)
 {
-    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::Communicator::getInstance();
     normals.push_back(0);
     normals.push_back(0);
     normals.push_back(1);
@@ -62,7 +62,7 @@ void ShearStressSimulationObserver::collectData(real step)
 
     // vector<string> cellDataNames;
 
-    // std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
+    // std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::Communicator::getInstance();
     // vector<string> pieces = comm->gatherStrings(piece);
     // if (comm->getProcessID() == comm->getRoot())
     //{
@@ -94,7 +94,7 @@ void ShearStressSimulationObserver::collectData(real step)
     piece           = subfolder + "/" + piece;
 
     vector<string> cellDataNames;
-    std::shared_ptr<vf::mpi::Communicator> comm = vf::mpi::Communicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm = vf::parallel::Communicator::getInstance();
     vector<string> pieces   = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         string pname =
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.cpp
index ebd65f625600a1c68f48d00c33a79976ea6d1a5a..6c1d833cc222aed76a2d348ab2be5ea6982f961d 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.cpp
@@ -4,7 +4,7 @@
 #include "LBMKernel.h"
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
@@ -16,7 +16,7 @@ TimeAveragedValuesSimulationObserver::TimeAveragedValuesSimulationObserver() = d
 //////////////////////////////////////////////////////////////////////////
 TimeAveragedValuesSimulationObserver::TimeAveragedValuesSimulationObserver(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
-                                                             std::shared_ptr<vf::mpi::Communicator> comm, int options)
+                                                             std::shared_ptr<vf::parallel::Communicator> comm, int options)
     : SimulationObserver(grid, s), path(path), writer(writer), comm(comm), options(options)
 {
     init();
@@ -26,7 +26,7 @@ TimeAveragedValuesSimulationObserver::TimeAveragedValuesSimulationObserver(SPtr<
 //////////////////////////////////////////////////////////////////////////
 TimeAveragedValuesSimulationObserver::TimeAveragedValuesSimulationObserver(SPtr<Grid3D> grid, const std::string &path,
                                                              WbWriter *const writer, SPtr<UbScheduler> s,
-                                                             std::shared_ptr<vf::mpi::Communicator> comm, int options,
+                                                             std::shared_ptr<vf::parallel::Communicator> comm, int options,
                                                              std::vector<int> levels, std::vector<real> &levelCoords,
                                                              std::vector<real> &bounds, bool timeAveraging)
     : SimulationObserver(grid, s), path(path), writer(writer), comm(comm), options(options), levels(levels),
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.h
index 14a1f6354aa57ca588361299caf7a1d336001f9e..a9f78137b8452f476dc37f8dfac711c9a414e889 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeAveragedValuesSimulationObserver.h
@@ -9,7 +9,7 @@
 #include "IntegrateValuesHelper.h"
 #include "LBMSystem.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -41,9 +41,9 @@ public:
 public:
     TimeAveragedValuesSimulationObserver();
     TimeAveragedValuesSimulationObserver(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                  SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options);
+                                  SPtr<UbScheduler> s, std::shared_ptr<vf::parallel::Communicator> comm, int options);
     TimeAveragedValuesSimulationObserver(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                  SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm, int options, std::vector<int> levels,
+                                  SPtr<UbScheduler> s, std::shared_ptr<vf::parallel::Communicator> comm, int options, std::vector<int> levels,
                                   std::vector<real> &levelCoords, std::vector<real> &bounds,
                                   bool timeAveraging = true);
     //! Make update
@@ -70,7 +70,7 @@ protected:
     void calculateAverageValuesForPlane(std::vector<IntegrateValuesHelper::CalcNodes> &cnodes);
 
 private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     std::vector<UbTupleFloat3> nodes;
     std::vector<UbTupleUInt8> cells;
     std::vector<std::string> datanames;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.cpp
index e0560e2767b70dcc51db08f807a29e467efa6a2d..5245c51d9cb8b01324388d5cbfc61db5dc5ce5a1 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.cpp
@@ -9,14 +9,14 @@
 
 #include <fstream>
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "IntegrateValuesHelper.h"
 #include "LBMUnitConverter.h"
 #include "UbScheduler.h"
 
 TimeseriesSimulationObserver::TimeseriesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<IntegrateValuesHelper> h1,
-                                             const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
+                                             const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), h1(h1), path(path), comm(comm)
 {
     if (comm->getProcessID() == comm->getRoot()) {
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.h
index db41bd2ecea38ac86e97740310ba9501f94caa4a..d467b2301688d716ffb0b879fd8d0fab7353b077 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TimeseriesSimulationObserver.h
@@ -13,7 +13,7 @@
 
 #include "SimulationObserver.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class IntegrateValuesHelper;
@@ -27,7 +27,7 @@ class TimeseriesSimulationObserver : public SimulationObserver
 {
 public:
     TimeseriesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<IntegrateValuesHelper> h1,
-                          const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+                          const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     ~TimeseriesSimulationObserver() override;
 
     //! calls collectData.
@@ -38,7 +38,7 @@ protected:
 
     //! object that can compute spacial average values in 3D-subdomain.
     SPtr<IntegrateValuesHelper> h1;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
 private:
     std::string path; //! output filename, e.g.  pathname + "/steps/timeseries"
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.cpp
index 47b865ed7b80fd1c420d59fa54144d5afa1471f5..d5a0ccb593488f4e992fef0fc7591c0672fc24ed 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.cpp
@@ -3,7 +3,7 @@
 #include "BCArray3D.h"
 #include "BCSet.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMKernel.h"
@@ -14,7 +14,7 @@
 
 TurbulenceIntensitySimulationObserver::TurbulenceIntensitySimulationObserver(SPtr<Grid3D> grid, const std::string &path,
                                                                WbWriter *const writer, SPtr<UbScheduler> s,
-                                                               std::shared_ptr<vf::mpi::Communicator> comm)
+                                                               std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), comm(comm), writer(writer)
 {
     init();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.h
index c615bbda5cb58a522e6853fcf3f8475bc3320b52..cffaf49ede459093bae2e0709da27a244f74ad5a 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/TurbulenceIntensitySimulationObserver.h
@@ -8,7 +8,7 @@
 #include "SimulationObserver.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -18,7 +18,7 @@ class TurbulenceIntensitySimulationObserver : public SimulationObserver
 {
 public:
     TurbulenceIntensitySimulationObserver(SPtr<Grid3D> grid, const std::string &path, WbWriter *const writer,
-                                   SPtr<UbScheduler> s, std::shared_ptr<vf::mpi::Communicator> comm);
+                                   SPtr<UbScheduler> s, std::shared_ptr<vf::parallel::Communicator> comm);
     void update(real step) override;
 
 protected:
@@ -39,7 +39,7 @@ private:
     int gridRank;
     std::string path;
     WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     enum Values { AvVx = 0, AvVy = 1, AvVz = 2, AvVxxyyzz = 3 };
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.cpp
index fd983bd02bd0bf3a7ae0cd0af96b2f169180f31b..15a1c39cf268659d8cc8dec4b7633636049a019d 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.cpp
@@ -36,13 +36,13 @@
 #include <logger/Logger.h>
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include "UbScheduler.h"
 
 WriteBlocksSimulationObserver::WriteBlocksSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                               WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm)
+                                               WbWriter *const writer, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), writer(writer), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.h
index 805605b64564272c6a327545a4b01bc89926da38..636dc9f18d9b232f0a777232b3d69f26a545a477 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBlocksSimulationObserver.h
@@ -39,7 +39,7 @@
 
 #include "SimulationObserver.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -57,7 +57,7 @@ public:
     //! \param writer is WbWriter object
     //! \param comm is Communicator object
     WriteBlocksSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                           std::shared_ptr<vf::mpi::Communicator> comm);
+                           std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteBlocksSimulationObserver() override;
 
     void update(real step) override;
@@ -69,7 +69,7 @@ protected:
 
     std::string path;
     WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.cpp
index 9d09db9e2c839f5db6bdd4c95e348e3ade094759..d88315475e4a8f8419c26a5b186d5753fb107fa1 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.cpp
@@ -42,7 +42,7 @@
 #include "BCArray3D.h"
 #include "Block3D.h"
 #include "CbArray3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
 #include "UbScheduler.h"
@@ -55,7 +55,7 @@ WriteBoundaryConditionsSimulationObserver::WriteBoundaryConditionsSimulationObse
 //////////////////////////////////////////////////////////////////////////
 WriteBoundaryConditionsSimulationObserver::WriteBoundaryConditionsSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                        const std::string &path, WbWriter *const writer,
-                                                                       std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                       std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), writer(writer), comm(comm)
 {
     gridRank     = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.h
index ad5b20df942748e065cebe926ba346581b9bf30b..aff7893432dfbd99f9ceae5d407e26c39e16304d 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteBoundaryConditionsSimulationObserver.h
@@ -41,7 +41,7 @@
 #include "SimulationObserver.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -61,7 +61,7 @@ public:
     //! \param writer is WbWriter object
     //! \param comm is Communicator object
     WriteBoundaryConditionsSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                       WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm);
+                                       WbWriter *const writer, std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteBoundaryConditionsSimulationObserver() override = default;
 
     void update(real step) override;
@@ -84,6 +84,6 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 #endif
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.cpp
index 62178444f92abffebe8ce5d2ad1bd8a1f54960f0..7ad8c2dc113e2b5b8f9aba6e173efc3dd6817f05 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.cpp
@@ -1,5 +1,5 @@
 #include "WriteGbObjectsSimulationObserver.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "GbObject3D.h"
 #include "UbScheduler.h"
 #include "WbWriterVtkXmlASCII.h"
@@ -7,7 +7,7 @@
 #include <vector>
 
 WriteGbObjectsSimulationObserver::WriteGbObjectsSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                     WbWriter *const writer, std::shared_ptr<vf::mpi::Communicator> comm)
+                                                     WbWriter *const writer, std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), path(path), writer(writer), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.h
index 50f88c65ddbf87bb9960f2be61e380e9ad2d570b..44e466ee95713675dd507643c8fa0b7194b0f612 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteGbObjectsSimulationObserver.h
@@ -7,7 +7,7 @@
 #include <vector>
 
 class GbObject3D;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class WbWriter;
@@ -21,7 +21,7 @@ class WriteGbObjectsSimulationObserver : public SimulationObserver
 {
 public:
     WriteGbObjectsSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, WbWriter *const writer,
-                              std::shared_ptr<vf::mpi::Communicator> comm);
+                              std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteGbObjectsSimulationObserver() override;
     //! calls collectData.
     void update(real step) override;
@@ -35,7 +35,7 @@ private:
     std::vector<SPtr<GbObject3D>> objects;
     std::string path;
     WbWriter *writer;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif // WriteGbObjectsSimulationObserver_h__
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.cpp
index caf1e8c1ed2d4c43a219e1fd7a09b3a96e0e2370..945058c8e94b2d7780c4a03444c0848267b4b8f7 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.cpp
@@ -6,7 +6,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "GbObject3D.h"
 #include "Grid3D.h"
@@ -19,7 +19,7 @@ WriteMQFromSelectionSimulationObserver::WriteMQFromSelectionSimulationObserver()
 WriteMQFromSelectionSimulationObserver::WriteMQFromSelectionSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
                                                                  SPtr<GbObject3D> gbObject, const std::string &path,
                                                                  WbWriter *const writer, SPtr<LBMUnitConverter> conv,
-                                                                 std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                 std::shared_ptr<vf::parallel::Communicator> comm)
     : SimulationObserver(grid, s), gbObject(gbObject), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank     = comm->getProcessID();
@@ -80,7 +80,7 @@ void WriteMQFromSelectionSimulationObserver::collectData(real step)
     piece                = subfolder + "/" + piece;
 
     std::vector<std::string> cellDataNames;
-    std::shared_ptr<vf::mpi::Communicator> comm         = vf::mpi::Communicator::getInstance();
+    std::shared_ptr<vf::parallel::Communicator> comm         = vf::parallel::Communicator::getInstance();
     std::vector<std::string> pieces = comm->gather(piece);
     if (comm->getProcessID() == comm->getRoot()) {
         std::string pname =
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.h
index e91fc369e1ddb33af68629d6aab75cf5b3756290..107d49720faed3f4fb1e5478b2bb75047f48ddb5 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMQFromSelectionSimulationObserver.h
@@ -10,7 +10,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -24,7 +24,7 @@ public:
     WriteMQFromSelectionSimulationObserver();
     WriteMQFromSelectionSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, SPtr<GbObject3D> gbObject,
                                     const std::string &path, WbWriter *const writer, SPtr<LBMUnitConverter> conv,
-                                    std::shared_ptr<vf::mpi::Communicator> comm);
+                                    std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteMQFromSelectionSimulationObserver() override = default;
 
     void update(real step) override;
@@ -47,7 +47,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     SPtr<GbObject3D> gbObject;
 
     using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.cpp
index 142bcc52b053f0be71c8a1ca41eaf0dfeaf24f1b..f098a21f536ba9ee1c8786007fe5ef272fb674af 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.cpp
@@ -52,7 +52,7 @@ WriteMacroscopicQuantitiesPlusMassSimulationObserver::WriteMacroscopicQuantities
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                             std::shared_ptr<vf::parallel::Communicator> comm)
         : SimulationObserver(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.h
index ce6946528269adec3374dff655991b4a8cb0aaf7..ee892f41a597325fc1d6cfb5515d5f333e94cf21 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesPlusMassSimulationObserver.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteMacroscopicQuantitiesPlusMassSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteMacroscopicQuantitiesPlusMassSimulationObserver() override = default;
 
     void update(real step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.cpp b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.cpp
index b87b5cfcfc5b10b3fc97d54b135a745c811f9e0e..d5c80b4df56bc303361572e9114ab57efe995ecc 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.cpp
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.cpp
@@ -39,7 +39,7 @@
 
 #include "BCArray3D.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "DataSet3D.h"
 #include "Grid3D.h"
 #include "LBMUnitConverter.h"
@@ -52,7 +52,7 @@ WriteMacroscopicQuantitiesSimulationObserver::WriteMacroscopicQuantitiesSimulati
                                                                              const std::string &path,
                                                                              WbWriter *const writer,
                                                                              SPtr<LBMUnitConverter> conv,
-                                                                             std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                             std::shared_ptr<vf::parallel::Communicator> comm)
         : SimulationObserver(grid, s), path(path), writer(writer), conv(conv), comm(comm)
 {
     gridRank = comm->getProcessID();
diff --git a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.h b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.h
index 85de0336487a71774d63c86f4c6ba1b65b15fe2a..279f9dfba3cc41c0a06ef4cc66b06e6b841f4259 100644
--- a/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.h
+++ b/src/cpu/VirtualFluidsCore/SimulationObservers/WriteMacroscopicQuantitiesSimulationObserver.h
@@ -42,7 +42,7 @@
 #include "LBMSystem.h"
 #include "UbTuple.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class UbScheduler;
 class LBMUnitConverter;
@@ -63,7 +63,7 @@ public:
     //! \param conv is LBMUnitConverter object
     //! \param comm is Communicator object
     WriteMacroscopicQuantitiesSimulationObserver(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::mpi::Communicator> comm);
+                                          WbWriter *const writer, SPtr<LBMUnitConverter> conv, std::shared_ptr<vf::parallel::Communicator> comm);
     ~WriteMacroscopicQuantitiesSimulationObserver() override = default;
 
     void update(real step) override;
@@ -90,7 +90,7 @@ private:
     int minInitLevel;
     int maxInitLevel;
     int gridRank;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 
     using CalcMacrosFct = void (*)(const real *const &, real &, real &, real &, real &);
     CalcMacrosFct calcMacros;
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
index 358dabf437fb69325a905d968e8dc6547127fd8f..0f41364ad614cf8ffb511461d713336e58fd0d1c 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
@@ -1,7 +1,7 @@
 #include "CheckpointConverter.h"
 #include "Block3D.h"
 #include "BoundaryConditions.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "CoordinateTransformation3D.h"
 #include "DataSet3D.h"
 #include "Grid3D.h"
@@ -12,7 +12,7 @@
 
 using namespace MPIIODataStructures;
 
-CheckpointConverter::CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm)
+CheckpointConverter::CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm)
     : grid(grid), path(path), comm(comm)
 {
     UbSystem::makeDirectory(path + "/mpi_io_cp");
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
index bab67ae662c10e31158b47e1725788dc38794560..a2902b366cc1c4c06ccdc0513ed329757f22a558 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.h
@@ -8,14 +8,14 @@
 #include <vector>
 
 class Grid3D;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 
 //! \class UtilConvertor
 //! \brief Converts timestep data from MPIIORestartSimulationObserver format into MPIIOMigrationSimulationObserver format
 class CheckpointConverter
 {
 public:
-    CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::mpi::Communicator> comm);
+    CheckpointConverter(SPtr<Grid3D> grid, const std::string &path, std::shared_ptr<vf::parallel::Communicator> comm);
     virtual ~CheckpointConverter();
 
     void convert(int step, int procCount);
@@ -26,7 +26,7 @@ public:
 
 protected:
     std::string path;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     SPtr<Grid3D> grid;
 
 private:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
index b931cbbbda004f7b2057943222d4523c5fb0916b..35816e3d5770ebfba348ad29e9a45ba0e0d72910 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.cpp
@@ -50,7 +50,7 @@ CreateTransmittersHelper::CreateTransmittersHelper() = default;
 //////////////////////////////////////////////////////////////////////////
 void CreateTransmittersHelper::createTransmitters(SPtr<Block3D> sblock, SPtr<Block3D> tblock, int dir, IBlock ib,
                                                   TransmitterPtr &sender, TransmitterPtr &receiver,
-                                                  std::shared_ptr<vf::mpi::Communicator> comm, TransmitterType tType)
+                                                  std::shared_ptr<vf::parallel::Communicator> comm, TransmitterType tType)
 {
     // SourceBlock
     int srcLevel = sblock->getLevel();
diff --git a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
index af60de0a2e2b9e06488df3011584b8448594bf85..1a52078fa081b17ffc7e30c8f3cb154e9d698657 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/CreateTransmittersHelper.h
@@ -35,7 +35,7 @@
 #define CREATETRANSMITTERSHELPER_H
 
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 
 #include "LBMSystem.h"
 
@@ -61,7 +61,7 @@ public:
 public:
     CreateTransmittersHelper();
     void createTransmitters(const SPtr<Block3D> sblock, const SPtr<Block3D> tblock, int dir, IBlock ib,
-                            TransmitterPtr &sender, TransmitterPtr &receiver, std::shared_ptr<vf::mpi::Communicator> comm,
+                            TransmitterPtr &sender, TransmitterPtr &receiver, std::shared_ptr<vf::parallel::Communicator> comm,
                             TransmitterType tType);
 
 protected:
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
index 1e62e0a2c35367fb6189822bcdbf96b611d75bb9..30708d664deb405954f95a4aac58cd6c01d17153 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.cpp
@@ -2,14 +2,14 @@
 
 #include "MetisPartitioningGridVisitor.h"
 #include "Block3D.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include <cmath>
 
 using namespace std;
 
-MetisPartitioningGridVisitor::MetisPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, GraphType graphType, int numOfDirs,
+MetisPartitioningGridVisitor::MetisPartitioningGridVisitor(std::shared_ptr<vf::parallel::Communicator> comm, GraphType graphType, int numOfDirs,
                                                            MetisPartitioner::PartType partType, bool threads,
                                                            int numberOfThreads)
     : Grid3DVisitor(), numberOfThreads(numberOfThreads), numOfDirs(numOfDirs), comm(comm), threads(threads),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
index c270d3ce389cc2697c1ac54178984ffa2f4d07a9..d4e29060932b61abeaa68d03e3e0fd566d45a927 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/MetisPartitioningGridVisitor.h
@@ -9,7 +9,7 @@
 #include "Grid3DVisitor.h"
 #include "MetisPartitioner.h"
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 
 ////////////////////////////////////////////////////////////////////////
 //! \brief The class implements domain decomposition with METIS library
@@ -32,7 +32,7 @@ public:
     //! \param numOfDirs - maximum number of neighbors for each process
     //! \param threads - on/off decomposition for threads
     //! \param numberOfThreads - number of threads
-    MetisPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, GraphType graphType, int numOfDirs,
+    MetisPartitioningGridVisitor(std::shared_ptr<vf::parallel::Communicator> comm, GraphType graphType, int numOfDirs,
                                  MetisPartitioner::PartType partType = MetisPartitioner::KWAY, bool threads = false,
                                  int numberOfThreads = 0);
     ~MetisPartitioningGridVisitor() override;
@@ -52,7 +52,7 @@ protected:
     int numOfDirs;
     std::vector<int> blockID;
     std::vector<idx_t> parts;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     int bundleRoot;
     int processRoot;
     int bundleID;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
index a73965641237c804cd094f399e582336e6be8e04..b2eefc859d26ef92d43bb7701a1eb96e6a7a6f4a 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.cpp
@@ -1,5 +1,5 @@
 #include "RefineAroundGbObjectHelper.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "OverlapBlockVisitor.h"
 #include "RatioBlockVisitor.h"
 #include "RatioSmoothBlockVisitor.h"
@@ -11,7 +11,7 @@
 RefineAroundGbObjectHelper::RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int refineLevel,
                                                        SPtr<D3Q27TriFaceMeshInteractor> objectIter,
                                                        real startDistance, real stopDistance,
-                                                       std::shared_ptr<vf::mpi::Communicator> comm)
+                                                       std::shared_ptr<vf::parallel::Communicator> comm)
     : grid(grid), refineLevel(refineLevel), objectIter(objectIter), startDistance(startDistance),
       stopDistance(stopDistance), comm(comm)
 {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
index 76874ce767294efa318bb7e8b9f8b4d2e2a348eb..c5cc4d6fc426566770bdfbfd4637f32e1ecd411d 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineAroundGbObjectHelper.h
@@ -5,7 +5,7 @@
 #include "lbm/constants/D3Q27.h"
 
 class Grid3D;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class D3Q27TriFaceMeshInteractor;
 
 //! \brief Refine blocks on base of bounding boxes.
@@ -21,7 +21,7 @@ public:
     //! \param startDistance start distance from geometry for refinement
     //! \param stopDistance stop distance from geometry for refinement
     RefineAroundGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, SPtr<D3Q27TriFaceMeshInteractor> objectIter,
-                               real startDistance, real stopDistance, std::shared_ptr<vf::mpi::Communicator> comm);
+                               real startDistance, real stopDistance, std::shared_ptr<vf::parallel::Communicator> comm);
     virtual ~RefineAroundGbObjectHelper();
     //! start refinement
     void refine();
@@ -31,7 +31,7 @@ private:
     SPtr<D3Q27TriFaceMeshInteractor> objectIter;
     int refineLevel;
     real startDistance, stopDistance;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
index 52c7c3ac1204a96fe7db3089ef2eb3ecc93ac143..f11e8e5ce1bc04af79775be7bfdd14201febbc3f 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.cpp
@@ -1,6 +1,6 @@
 #include "RefineCrossAndInsideGbObjectHelper.h"
 #include "CheckRatioBlockVisitor.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "OverlapBlockVisitor.h"
 #include "RatioBlockVisitor.h"
 #include "RatioSmoothBlockVisitor.h"
@@ -11,7 +11,7 @@
 #include <Grid3D.h>
 
 RefineCrossAndInsideGbObjectHelper::RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel,
-                                                                       std::shared_ptr<vf::mpi::Communicator> comm)
+                                                                       std::shared_ptr<vf::parallel::Communicator> comm)
     : grid(grid), maxRefineLevel(maxRefineLevel), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
index d0a9ac44891519b3fd583f98a56e33dfd1e42122..28caf212afa50d8d07979e137c863f9554543adf 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RefineCrossAndInsideGbObjectHelper.h
@@ -4,7 +4,7 @@
 #include <PointerDefinitions.h>
 #include <vector>
 
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Grid3D;
 class GbObject3D;
 
@@ -17,7 +17,7 @@ public:
     //! Constructor
     //! \param grid a smart pointer to the grid object
     //! \param maxRefineLevel an integer for maximal refinement level
-    RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, std::shared_ptr<vf::mpi::Communicator> comm);
+    RefineCrossAndInsideGbObjectHelper(SPtr<Grid3D> grid, int maxRefineLevel, std::shared_ptr<vf::parallel::Communicator> comm);
     virtual ~RefineCrossAndInsideGbObjectHelper();
     //! add geometric object
     //! \param object a smart pointer to bounding box
@@ -31,7 +31,7 @@ private:
     std::vector<SPtr<GbObject3D>> objects;
     std::vector<int> levels;
     int maxRefineLevel;
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
index ed9a3ee59c87ab755416eecd5468a4cc763837e4..a21b8b8a61575eedbacdc76fdd55ef89c4318173 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.cpp
@@ -4,7 +4,7 @@
 #include "D3Q27System.h"
 //#include <mpi.h>
 
-RenumberGridVisitor::RenumberGridVisitor(std::shared_ptr<vf::mpi::Communicator> com) : comm(com) {}
+RenumberGridVisitor::RenumberGridVisitor(std::shared_ptr<vf::parallel::Communicator> com) : comm(com) {}
 
 //////////////////////////////////////////////////////////////////////////
 void RenumberGridVisitor::visit(SPtr<Grid3D> grid)
diff --git a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
index 993bccd1034d0fb648c2e05d77da380916816967..aa56b469107dc2be3332622b747e0c0f3e3b9fe7 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/RenumberGridVisitor.h
@@ -8,7 +8,7 @@
 #ifndef RenumberGridVisitor_h
 #define RenumberGridVisitor_h
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3DVisitor.h"
 
 class Grid3D;
@@ -19,14 +19,14 @@ class Grid3D;
 class RenumberGridVisitor : public Grid3DVisitor
 {
 public:
-    RenumberGridVisitor(std::shared_ptr<vf::mpi::Communicator> com);
+    RenumberGridVisitor(std::shared_ptr<vf::parallel::Communicator> com);
 
     ~RenumberGridVisitor() override = default;
 
     void visit(SPtr<Grid3D> grid) override;
 
 private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     //   static int counter;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
index ae214c77ca425dde4ecde31f7dc88d19a1616555..fcf2c93d233a5168f0ff4586244ee2088a7bf627 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetConnectorsBlockVisitor.h
@@ -41,7 +41,7 @@
 #include "D3Q27System.h"
 #include "Grid3D.h"
 #include "CreateTransmittersHelper.h"
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "OneDistributionFullDirectConnector.h"
 #include "OneDistributionFullVectorConnector.h"
 #include "TwoDistributionsFullDirectConnector.h"
@@ -62,19 +62,19 @@ public:
     using LocalConnector  = T1;
     using RemoteConnector = T2;
 public:
-    SetConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm);
+    SetConnectorsBlockVisitor(std::shared_ptr<vf::parallel::Communicator> comm);
     ~SetConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
 protected:
     void setSameLevelConnectors(SPtr<Grid3D> grid, SPtr<Block3D> block);
     void setRemoteConnectors(SPtr<Block3D> sblock, SPtr<Block3D> tblock, int dir);
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     int gridRank{0};
 };
 
 template <class T1, class T2>
-SetConnectorsBlockVisitor<T1, T2>::SetConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm)
+SetConnectorsBlockVisitor<T1, T2>::SetConnectorsBlockVisitor(std::shared_ptr<vf::parallel::Communicator> comm)
     : Block3DVisitor(0, D3Q27System::MAXLEVEL), comm(comm)
 {
 }
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
index 362e8c92751e1275bfdaa7d7daf1567f117ecdf4..bdf851025d380daa93db2477217a513002591ebb 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.cpp
@@ -39,10 +39,10 @@
 #include "D3Q27System.h"
 #include <basics/transmitter/TbTransmitterLocal.h>
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Interpolator.h"
 
-SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<Interpolator> iProcessor) :
+SetInterpolationConnectorsBlockVisitor::SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::parallel::Communicator> comm, real nue, SPtr<Interpolator> iProcessor) :
 Block3DVisitor(0, D3Q27System::MAXLEVEL), 
 	comm(comm),
 	nue(nue),
diff --git a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
index b1f6f99e1c4eff543f87dfcd70e7b45bf27701d5..4e4b205397842bc1218b5a0ec5555b527f989377 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/SetInterpolationConnectorsBlockVisitor.h
@@ -43,14 +43,14 @@
 
 class Grid3D;
 class Block3D;
-namespace vf::mpi {class Communicator;}
+namespace vf::parallel {class Communicator;}
 class Interpolator;
 
 //! \brief  A class sets connectors between blocks.
 class SetInterpolationConnectorsBlockVisitor : public Block3DVisitor
 {
 public:
-    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::mpi::Communicator> comm, real nue, SPtr<Interpolator> iProcessor);
+    SetInterpolationConnectorsBlockVisitor(std::shared_ptr<vf::parallel::Communicator> comm, real nue, SPtr<Interpolator> iProcessor);
     ~SetInterpolationConnectorsBlockVisitor() override;
     void visit(SPtr<Grid3D> grid, SPtr<Block3D> block) override;
     //////////////////////////////////////////////////////////////////////////
@@ -63,7 +63,7 @@ protected:
                             CreateTransmittersHelper::TransmitterPtr &receiverCF,
                             CreateTransmittersHelper::TransmitterPtr &senderFC,
                             CreateTransmittersHelper::TransmitterPtr &receiverFC);
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     int gridRank;
     real nue;
     SPtr<Interpolator> iProcessor;
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
index 7d9f5e8d4e233c6f18aa5e95818b71143c3d3442..c168cd664d5de94fab8039a79c79b5e38c53adb8 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.cpp
@@ -9,7 +9,7 @@
 
 using namespace std;
 
-ZoltanPartitioningGridVisitor::ZoltanPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, int numOfDirs,
+ZoltanPartitioningGridVisitor::ZoltanPartitioningGridVisitor(std::shared_ptr<vf::parallel::Communicator> comm, int numOfDirs,
                                                              int numOfLocalParts)
     : comm(comm), numOfDirs(numOfDirs), numOfLocalParts(numOfLocalParts)
 {
diff --git a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
index aeaf4d705c0b91cad482f87dff36ad6347363504..1f02c5efa3e7566d1407952024d358597bdb9e30 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
+++ b/src/cpu/VirtualFluidsCore/Visitors/ZoltanPartitioningGridVisitor.h
@@ -10,14 +10,14 @@
 
 #if defined VF_ZOLTAN && defined VF_MPI
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 #include "Grid3DVisitor.h"
 #include "ZoltanPartitioner.h"
 
 class ZoltanPartitioningGridVisitor : public Grid3DVisitor
 {
 public:
-    ZoltanPartitioningGridVisitor(std::shared_ptr<vf::mpi::Communicator> comm, int numOfDirs, int numOfLocalParts = 1);
+    ZoltanPartitioningGridVisitor(std::shared_ptr<vf::parallel::Communicator> comm, int numOfDirs, int numOfLocalParts = 1);
     ~ZoltanPartitioningGridVisitor();
     void visit(SPtr<Grid3D> grid);
 
@@ -26,7 +26,7 @@ protected:
     void repartGrid(SPtr<Grid3D> grid, ZoltanPartitioner &zp);
 
 private:
-    std::shared_ptr<vf::mpi::Communicator> comm;
+    std::shared_ptr<vf::parallel::Communicator> comm;
     int numOfDirs;
     int numOfLocalParts;
     ZoltanGraph *graph;
diff --git a/src/cpu/simulationconfig/Simulation.cpp b/src/cpu/simulationconfig/Simulation.cpp
index 67c4e469d41ea24f1029f09e1beb78a597974e4c..4b7d52286995ed9b5eadd5827d586b49e9f222a4 100644
--- a/src/cpu/simulationconfig/Simulation.cpp
+++ b/src/cpu/simulationconfig/Simulation.cpp
@@ -29,7 +29,7 @@
 #include <LBM/Interpolation/CompressibleOffsetMomentsInterpolator.h>
 #include <LBM/LBMKernel.h>
 #include <LBM/LBMUnitConverter.h>
-#include <mpi/MPICommunicator.h>
+#include <parallel/MPICommunicator.h>
 #include <Visitors/GenBlocksGridVisitor.h>
 #include <Visitors/InitDistributionsBlockVisitor.h>
 #include <Visitors/MetisPartitioningGridVisitor.h>
@@ -45,7 +45,7 @@
 
 CPUSimulation::CPUSimulation()
 {
-    this->communicator = vf::mpi::MPICommunicator::getInstance();
+    this->communicator = vf::parallel::MPICommunicator::getInstance();
     this->grid = std::make_shared<Grid3D>(communicator);
 }
 
diff --git a/src/cpu/simulationconfig/Simulation.h b/src/cpu/simulationconfig/Simulation.h
index ee8fc911c684b55d0f624ab8db960d225e24790b..be29539187338cdadb5df2d102c90d03ee342434 100644
--- a/src/cpu/simulationconfig/Simulation.h
+++ b/src/cpu/simulationconfig/Simulation.h
@@ -5,7 +5,7 @@
 #include <memory>
 #include <set>
 
-#include <mpi/Communicator.h>
+#include <parallel/Communicator.h>
 
 #include <geometry3d/GbPoint3D.h>
 #include <Interactors/Interactor3D.h>
@@ -78,7 +78,7 @@ private:
 
     std::shared_ptr<LBMKernel> lbmKernel;
     std::shared_ptr<AbstractLBMSystem> lbmSystem;
-    std::shared_ptr<vf::mpi::Communicator> communicator;
+    std::shared_ptr<vf::parallel::Communicator> communicator;
 
     std::shared_ptr<Grid3D> grid;
     std::vector<std::shared_ptr<Interactor3D>> interactors;
diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt
deleted file mode 100644
index 4d49cfedc903f3578b64916966405ea48bf28901..0000000000000000000000000000000000000000
--- a/src/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-project(cuda LANGUAGES CUDA CXX)
-
-vf_add_library(NAME vf_cuda PUBLIC_LINK logger)
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 4924432dbf05ca2213e5fa08cf16a28ea75f8c9e..fddcc7f790142e6ae5fae2eb07b78e0c18aa5d60 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -50,7 +50,7 @@
 using namespace vf::basics::constant;
 
 struct Vertex;
-class  Grid;
+class Grid;
 class Transformator;
 class ArrowTransformator;
 class PolyDataWriterWrapper;
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index d7e4a776271c5bc243a4ed663ffecd23dec5cebb..cd8abaf95b1904e835d47601bb80006ed0651fe9 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -1,11 +1,7 @@
 project(VirtualFluids_GPU LANGUAGES CUDA CXX)
 
-set(additional_libraries "")
-if(MSVC)
-    set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix
-endif()
 
-vf_add_library(PUBLIC_LINK basics lbm PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX vf_cuda)
+vf_add_library(PUBLIC_LINK basics lbm parallel PRIVATE_LINK GridGenerator cuda_helper)
 
 #SET(TPN_WIN32 "/EHsc")
 #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake
@@ -13,10 +9,15 @@ vf_add_library(PUBLIC_LINK basics lbm PRIVATE_LINK ${additional_libraries} GridG
 
 set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON)
 
+
+if(MSVC)
+    set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+endif()
+
 vf_add_tests()
 
 if(BUILD_VF_UNIT_TESTS)
-    set_target_properties(VirtualFluids_GPUTests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+    # set_target_properties(VirtualFluids_GPUTests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
     target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
     target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_ROOT_DIR}/src/gpu/GridGenerator/")
     set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA)
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index a2b1039afca4eaa3fcd75e28cae16cb5f68f6c9b..cf1aaa3988e0809dd0995a18139ab7dcef75989f 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -13,6 +13,8 @@
 #include "CollisionStrategy.h"
 #include "RefinementStrategy.h"
 
+#include <parallel/Communicator.h>
+
 void UpdateGrid27::updateGrid(int level, unsigned int t)
 {
     //////////////////////////////////////////////////////////////////////////
@@ -381,7 +383,7 @@ void UpdateGrid27::exchangeData(int level)
     exchangeMultiGPU_noStreams_withPrepare(level, false);
 }
 
-UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaMemoryManager,
+UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::parallel::Communicator &comm, SPtr<CudaMemoryManager> cudaMemoryManager,
                            std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels , BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory>  tmFactory, GridScalingFactory* scalingFactory)
     : para(para), comm(comm), cudaMemoryManager(cudaMemoryManager), pm(pm), kernels(kernels), tmFactory(tmFactory)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index 9c6ff48725f4e17121de0a1a8681d0bafcfb58ee..9de7e73ec03a0f00542dd5b718ed6f210399a18a 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -1,13 +1,17 @@
 #ifndef UPDATEGRID27_H
 #define UPDATEGRID27_H
 
-#include "LBM/LB.h"
+#include "Calculation/PorousMedia.h"
+#include "GPU/CudaMemoryManager.h"
 #include "GPU/GPU_Interface.h"
-#include "Parameter/Parameter.h"
+#include "LBM/LB.h"
 #include "Parameter/CudaStreamManager.h"
-#include "GPU/CudaMemoryManager.h"
-#include "Communication/Communicator.h"
-#include "Calculation/PorousMedia.h"
+#include "Parameter/Parameter.h"
+
+namespace vf::parallel
+{
+class Communicator;
+}
 
 class BCKernelManager;
 class ADKernelManager;
@@ -24,7 +28,7 @@ using RefinementStrategy = std::function<void (UpdateGrid27* updateGrid, Paramet
 class UpdateGrid27
 {
 public:
-    UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaMemoryManager,
+    UpdateGrid27(SPtr<Parameter> para, vf::parallel::Communicator& comm, SPtr<CudaMemoryManager> cudaMemoryManager,
                  std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory* scalingFactory);
     void updateGrid(int level, unsigned int t);
     void exchangeData(int level);
@@ -72,7 +76,7 @@ private:
 
 private:
     SPtr<Parameter> para;
-    vf::gpu::Communicator& comm;
+    vf::parallel::Communicator& comm;
     SPtr<CudaMemoryManager> cudaMemoryManager;
     std::vector<std::shared_ptr<PorousMedia>> pm;
     std::vector<SPtr<Kernel>> kernels;
diff --git a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h b/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h
deleted file mode 100644
index 26c017f939b0795457d74008a21cb9e7a4b75bd0..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutine.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef INDEX_EXCHANGE
-#define INDEX_EXCHANGE
-
-#include <basics/DataTypes.h>
-
-namespace vf::gpu
-{
-class CommunicationRoutine
-{
-public:
-    virtual ~CommunicationRoutine() = default;
-
-    virtual void receive_send(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
-                              int size_buffer_send, int neighbor_rank_send) const = 0;
-    virtual int getPID() const = 0;
-};
-} // namespace vf::gpu
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutineMocks.h b/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutineMocks.h
deleted file mode 100644
index d05e5b6a3fdb75f9073d9f376980612f040c4038..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Communication/CommunicationRoutineMocks.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef VF_GPU_COMMUNICATIONROUTINEMOCKS_H
-#define VF_GPU_COMMUNICATIONROUTINEMOCKS_H
-
-#include "CommunicationRoutine.h"
-
-namespace vf::gpu::test 
-{
-
-class CommunicationRoutineTestDouble : public vf::gpu::CommunicationRoutine
-{
-public:
-    void receive_send(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
-                              int size_buffer_send, int neighbor_rank_send) const override { } 
-    int getPID() const override { return 0; }
-};
-
-}
-
-
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
deleted file mode 100644
index c52d5af9cacb4d5ae4e46090a263f67d4e63f12d..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef COMMUNICATOR_GPU_H
-#define COMMUNICATOR_GPU_H
-
-#include <vector>
-#include <basics/DataTypes.h>
-
-#include "VirtualFluids_GPU_export.h"
-#include "CommunicationRoutine.h"
-
-namespace vf::gpu
-{
-
-class VIRTUALFLUIDS_GPU_EXPORT Communicator : public CommunicationRoutine
-{
-public:
-    virtual void waitAll() = 0;
-    virtual int getPID() const override = 0;
-    virtual int getNumberOfProcess() const = 0;
-    virtual void exchngData(float *sbuf_t, float *rbuf_t, float *sbuf_b, float *rbuf_b, int count) = 0;
-    //////////////////////////////////////////////////////////////////////////
-    virtual void exchngDataGPU(real *sbuf, int count_s, real *rbuf, int count_r, int nb_rank) = 0;
-    virtual void nbRecvDataGPU(real *rbuf, int count_r, int nb_rank) = 0;
-    virtual void nbSendDataGPU(real *sbuf, int count_s, int nb_rank) = 0;
-    virtual void waitallGPU() = 0;
-    virtual void sendDataGPU(real *sbuf, int count_s, int nb_rank) = 0;
-    virtual void waitGPU(int id) = 0;
-    virtual void resetRequest() = 0;
-    //////////////////////////////////////////////////////////////////////////
-    virtual int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev) = 0;
-    virtual double reduceSum(double quantityPerProcess) = 0;
-    //////////////////////////////////////////////////////////////////////////
-    virtual void receive_send(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
-                              int size_buffer_send, int neighbor_rank_send) const override = 0;
-
-};
-
-} // namespace vf::gpu
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 48a27efa674e5fa85d47cb9439c52d0c558dac44..ff5e39c707d0e836440e40d33e3b261262d83b6d 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -1,9 +1,11 @@
-#include <helper_cuda.h>
 #include <cuda_runtime.h>
+#include <helper_cuda.h>
 
 #include "Communication/ExchangeData27.h"
 #include "Parameter/CudaStreamManager.h"
 
+#include <parallel/Communicator.h>
+
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -52,22 +54,20 @@ void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex s
     }
 }
 
-void startBlockingMpiSend(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator &comm,
+void startBlockingMpiSend(unsigned int numberOfSendProcessNeighbors, vf::parallel::Communicator &comm,
                           std::vector<ProcessNeighbor27> *sendProcessNeighborHost)
 {
     for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) {
-            comm.sendDataGPU((*sendProcessNeighborHost)[i].f[0], 
-                            (*sendProcessNeighborHost)[i].numberOfFs,
-                            (*sendProcessNeighborHost)[i].rankNeighbor);
+        comm.send((*sendProcessNeighborHost)[i].f[0], (*sendProcessNeighborHost)[i].numberOfFs,
+                  (*sendProcessNeighborHost)[i].rankNeighbor);
     }
 }
 
-void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::gpu::Communicator &comm,
+void startNonBlockingMpiReceive(unsigned int numberOfSendProcessNeighbors, vf::parallel::Communicator &comm,
                                 std::vector<ProcessNeighbor27> *recvProcessNeighborHost)
 {
     for (unsigned int i = 0; i < numberOfSendProcessNeighbors; i++) {
-            comm.nbRecvDataGPU((*recvProcessNeighborHost)[i].f[0], 
-                                (*recvProcessNeighborHost)[i].numberOfFs,
+        comm.receiveNonBlocking((*recvProcessNeighborHost)[i].f[0], (*recvProcessNeighborHost)[i].numberOfFs,
                                 (*recvProcessNeighborHost)[i].rankNeighbor);
     }
 }
@@ -117,7 +117,7 @@ void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStre
                                 (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
-void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::parallel::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                     int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -127,7 +127,7 @@ void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm
                            &para->getParH(level)->recvProcessNeighborX);
 }
 
-void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                      int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataXGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -149,7 +149,7 @@ void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaS
                                   (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")));
 }
 
-void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
+void exchangeCollDataXGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager, 
                             int level, CudaStreamIndex streamIndex,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -158,7 +158,7 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
 {
     cudaStream_t stream = para->getStreamManager()->getStream(streamIndex);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    //! \details steps: 
+    //! \details steps:
     //! 1. copy data from device to host
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
         cudaMemoryManager->cudaCopyProcessNeighborXFsDH(level, i, (*sendProcessNeighborDev)[i].memsizeFs);
@@ -174,10 +174,10 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! 5. wait for until data is received
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++) comm.waitGPU(i);
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! 6. reset the request array, which was used for the mpi communication
-    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) comm.resetRequest();
+    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send"))) comm.resetRequests();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //! 7. copy received data from host to device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
@@ -201,7 +201,7 @@ void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStre
                                 (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
-void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                     int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -211,7 +211,7 @@ void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm
                            &para->getParH(level)->recvProcessNeighborY);
 }
 
-void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                      int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataYGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -233,7 +233,7 @@ void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaS
                                   (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")));
 }
 
-void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level,
+void exchangeCollDataYGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager, int level,
                             CudaStreamIndex streamIndex,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -270,10 +270,10 @@ void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) comm.waitGPU(i);
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // reset the request array
-    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) comm.resetRequest();
+    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send"))) comm.resetRequests();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Host to Device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++) {
@@ -298,7 +298,7 @@ void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStre
                                 (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")));
 }
 
-void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::parallel::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                     int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -307,7 +307,7 @@ void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm
                            &para->getParH(level)->sendProcessNeighborZ,
                            &para->getParH(level)->recvProcessNeighborZ);
 }
-void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                      int level, CudaStreamIndex streamIndex)
 {
     exchangeCollDataZGPU27(para, comm, cudaMemoryManager, level, streamIndex,
@@ -330,7 +330,7 @@ void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaS
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, 
+void exchangeCollDataZGPU27(Parameter *para, vf::parallel::Communicator &comm, CudaMemoryManager *cudaMemoryManager, int level, 
                             CudaStreamIndex streamIndex,
                             std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                             std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -380,10 +380,10 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
     startBlockingMpiSend((unsigned int)(*sendProcessNeighborHost).size(), comm, sendProcessNeighborHost);
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++) comm.waitGPU(i);
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // reset the request array
-    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) comm.resetRequest();
+    if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send"))) comm.resetRequests();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // copy Host to Device
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
@@ -416,7 +416,7 @@ void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //1D domain decomposition
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePreCollDataGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
     {
@@ -434,7 +434,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cuda
         //////////////////////////////////////////////////////////////////////////
         cudaMemoryManager->cudaCopyProcessNeighborFsDH(level, i);
         //////////////////////////////////////////////////////////////////////////
-        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+        comm.receiveSend(para->getParH(level)->sendProcessNeighbor[i].f[0], 
                             para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighbor[i].f[0],
                             para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -461,7 +461,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cuda
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePostCollDataGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
     {
@@ -479,7 +479,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
         //////////////////////////////////////////////////////////////////////////
         cudaMemoryManager->cudaCopyProcessNeighborFsDH(level, i);
         //////////////////////////////////////////////////////////////////////////
-        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+        comm.receiveSend(para->getParH(level)->sendProcessNeighbor[i].f[0], 
                             para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighbor[i].f[0],
                             para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -502,7 +502,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
+//void exchangePostCollDataXGPU27(Parameter* para, vf::parallel::Communicator& comm, int level)
 //{
 //    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 //    {
@@ -520,7 +520,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //        //////////////////////////////////////////////////////////////////////////
 //        para->cudaCopyProcessNeighborXFsDH(level, i);
 //        //////////////////////////////////////////////////////////////////////////
-//        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
+//        comm.receiveSend(para->getParH(level)->sendProcessNeighborX[i].f[0], 
 //                            para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 //                            para->getParH(level)->recvProcessNeighborX[i].f[0],
 //                            para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
@@ -549,7 +549,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Y
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
+//void exchangePreCollDataYGPU27(Parameter* para, vf::parallel::Communicator& comm, int level)
 //{
 //    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //    {
@@ -567,7 +567,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //        //////////////////////////////////////////////////////////////////////////
 //        para->cudaCopyProcessNeighborYFsDH(level, i);
 //        //////////////////////////////////////////////////////////////////////////
-//        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//        comm.receiveSend(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //                            para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //                            para->getParH(level)->recvProcessNeighborY[i].f[0],
 //                            para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -589,7 +589,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //    }
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
+//void exchangePostCollDataYGPU27(Parameter* para, vf::parallel::Communicator& comm, int level)
 //{
 //    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //    {
@@ -607,7 +607,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //        //////////////////////////////////////////////////////////////////////////
 //        para->cudaCopyProcessNeighborYFsDH(level, i);
 //        //////////////////////////////////////////////////////////////////////////
-//        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//        comm.receiveSend(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //                            para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //                            para->getParH(level)->recvProcessNeighborY[i].f[0],
 //                            para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -636,7 +636,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Z
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
+//void exchangePreCollDataZGPU27(Parameter* para, vf::parallel::Communicator& comm, int level)
 //{
 //    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //    {
@@ -654,7 +654,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //        //////////////////////////////////////////////////////////////////////////
 //        para->cudaCopyProcessNeighborZFsDH(level, i);
 //        //////////////////////////////////////////////////////////////////////////
-//        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//        comm.receiveSend(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //                            para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //                            para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //                            para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -676,7 +676,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //    }
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
+//void exchangePostCollDataZGPU27(Parameter* para, vf::parallel::Communicator& comm, int level)
 //{
 //    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //    {
@@ -694,7 +694,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 //        //////////////////////////////////////////////////////////////////////////
 //        para->cudaCopyProcessNeighborZFsDH(level, i);
 //        //////////////////////////////////////////////////////////////////////////
-//        comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//        comm.receiveSend(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //                            para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //                            para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //                            para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -771,7 +771,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, Cud
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePreCollDataADXGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -794,7 +794,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADX[i].f[0],
                             para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
     }
@@ -816,21 +816,18 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADX[i].f[0],
                           para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -852,7 +849,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePostCollDataADXGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -875,7 +872,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADX[i].f[0],
                             para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
     }
@@ -897,21 +894,18 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADX[i].f[0],
                           para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -940,7 +934,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePreCollDataADYGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -963,7 +957,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADY[i].f[0],
                             para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
     }
@@ -985,21 +979,18 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADY[i].f[0],
                           para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1021,7 +1012,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePostCollDataADYGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1044,7 +1035,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADY[i].f[0],
                             para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
     }
@@ -1066,21 +1057,18 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADY[i].f[0],
                           para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1109,7 +1097,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePreCollDataADZGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1132,7 +1120,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
                             para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
     }
@@ -1154,21 +1142,18 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
                           para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1190,7 +1175,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, C
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangePostCollDataADZGPU27(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1213,7 +1198,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+        comm.receiveNonBlocking(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
                             para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
                             para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
     }
@@ -1235,21 +1220,18 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm,
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+        comm.send(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
                           para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
                           para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1325,7 +1307,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangeCollDataF3XGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1349,7 +1331,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(
+        comm.receiveNonBlocking(
             para->getParH(level)->recvProcessNeighborF3X[i].g[0],
             para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs,
             para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor);
@@ -1358,22 +1340,19 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
     {
-        comm.sendDataGPU(
+        comm.send(
             para->getParH(level)->sendProcessNeighborF3X[i].g[0],
             para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs,
             para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1403,7 +1382,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangeCollDataF3YGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1427,7 +1406,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(
+        comm.receiveNonBlocking(
             para->getParH(level)->recvProcessNeighborF3Y[i].g[0],
             para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs,
             para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor);
@@ -1436,22 +1415,19 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
     {
-        comm.sendDataGPU(
+        comm.send(
             para->getParH(level)->sendProcessNeighborF3Y[i].g[0],
             para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs,
             para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
@@ -1481,7 +1457,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
+void exchangeCollDataF3ZGPU(Parameter* para, vf::parallel::Communicator& comm, CudaMemoryManager* cudaMemoryManager, int level)
 {
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Device to Host
@@ -1505,7 +1481,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start non blocking MPI receive
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.nbRecvDataGPU(
+        comm.receiveNonBlocking(
             para->getParH(level)->recvProcessNeighborF3Z[i].g[0],
             para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs,
             para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor);
@@ -1514,22 +1490,19 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMe
     //start blocking MPI send
     for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
     {
-        comm.sendDataGPU(
+        comm.send(
             para->getParH(level)->sendProcessNeighborF3Z[i].g[0],
             para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs,
             para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor);
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //Wait
-    for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
-    {
-        comm.waitGPU(i);
-    }
+    comm.waitAll();
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //reset the request array
     if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
     {
-        comm.resetRequest();
+        comm.resetRequests();
     }
     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     //copy Host to Device
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 8302ffdc47bfa012c47df00f90c2491039f4eaee..8b03b2b100fe8a039f9199c9141a434f481da3d0 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -1,13 +1,17 @@
 #ifndef EXCHANGEDATA27_H
 #define EXCHANGEDATA27_H
 
-#include "Communication/Communicator.h"
 #include "GPU/CudaMemoryManager.h"
 #include "GPU/GPU_Interface.h"
 #include "LBM/LB.h"
 #include "Parameter/Parameter.h"
 #include "Parameter/CudaStreamManager.h"
 
+namespace vf::parallel
+{
+class Communicator;
+}
+
 //! \file ExchangeData27.h
 //! \ingroup GPU
 //! \author Martin Schoenherr, Anna Wellmann
@@ -15,9 +19,9 @@
 
 //////////////////////////////////////////////////////////////////////////
 // 1D domain decomposition
-void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
+void exchangePreCollDataGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager, 
                                          int level);
-void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager, 
+void exchangePostCollDataGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager, 
                                           int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition
@@ -62,7 +66,7 @@ void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStre
 //! \param CudaMemoryManager is needed for moving the data between host and device
 //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers
 //! to the send and receive arrays, both on the device and the host
-void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataXGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -70,14 +74,14 @@ void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMe
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes
 //! \details Used in the communication after collision step
-void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::parallel::Communicator& comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
 //! interpolation processes on refined grids 
 //! \details Only exchange nodes which are part of the interpolation process on
 //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and
 //! before the interpolation coarse to fine. See [master thesis of Anna Wellmann]
-void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator& comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
@@ -94,15 +98,15 @@ void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaS
 void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
 void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
-void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataYGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level,CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHos);
-void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::parallel::Communicator& comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
-void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator& comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
 void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
@@ -111,15 +115,15 @@ void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaS
 void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
 void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex);
 
-void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataZGPU27(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, CudaStreamIndex streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
-void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::parallel::Communicator& comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
-void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator& comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex);
 
 void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex);
@@ -127,28 +131,25 @@ void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaS
 
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition convection diffusion
-void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADXGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADYGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADZGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADXGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADYGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADZGPU27(Parameter *para, vf::parallel::Communicator& comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition F3 - K18/K20
-void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3XGPU(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3YGPU(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3ZGPU(Parameter *para, vf::parallel::Communicator& comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-//////////////////////////////////////////////////////////////////////////
-void barrierGPU(vf::gpu::Communicator &comm);
-//////////////////////////////////////////////////////////////////////////
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.cpp
deleted file mode 100644
index 8af5931ce92b6fa4904ab3aea7c901773f61a6b3..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-#include "MpiCommunicator.h"
-
-#include <mpi.h>
-#include <vector>
-
-#include <logger/Logger.h>
-
-#if defined (_WIN32) || defined (_WIN64)
-   #include <Winsock2.h>
-#elif defined (__unix__)
-   #include <unistd.h>
-#endif
-//lib for windows Ws2_32.lib
-
-namespace vf::gpu
-{
-
-
-MpiCommunicator::MpiCommunicator()
-{
-    int mpiInitialized = 0; // false
-    MPI_Initialized(&mpiInitialized);
-    if (!mpiInitialized) {
-        MPI_Init(NULL, NULL);
-        VF_LOG_TRACE("vf::gpu::MpiCommunicator(): MPI_Init");
-    }
-
-    MPI_Comm_rank(MPI_COMM_WORLD, &PID);
-    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
-
-    commGPU = MPI_COMM_WORLD;
-    requestGPU.resize(0);
-    rcount = 0;
-
-    // Get a new communicator for a decomposition of the domain
-    int isperiodic[1] = { 0 };
-    MPI_Cart_create(MPI_COMM_WORLD, 1, &numprocs, isperiodic, 1, &comm1d);
-
-    // Get my position in this communicator, and my neighbors
-    MPI_Cart_shift(comm1d, 0, 1, &nbrbottom, &nbrtop);
-}
-
-MpiCommunicator::~MpiCommunicator()
-{
-    // proof if MPI is finalized
-    int _mpiFinalized = 0; // false
-    MPI_Finalized(&_mpiFinalized);
-    if (!_mpiFinalized) {
-        MPI_Finalize();
-        VF_LOG_TRACE("vf::gpu::~MpiCommunicator(): MPI_Finalize");
-    }
-}
-
-
-// C++11 thread safe singelton implementation:
-// https://stackoverflow.com/questions/1661529/is-meyers-implementation-of-the-singleton-pattern-thread-safe
-MpiCommunicator& MpiCommunicator::getInstance()
-{
-    static MpiCommunicator comm;
-    return comm;
-}
-
-void MpiCommunicator::exchngBottomToTop(float *sbuf, float *rbuf, int count)
-{
-    MPI_Sendrecv(sbuf, count, MPI_FLOAT, nbrtop, 0, rbuf, count, MPI_FLOAT, nbrbottom, 0, comm1d, status);
-}
-void MpiCommunicator::exchngTopToBottom(float *sbuf, float *rbuf, int count)
-{
-    MPI_Sendrecv(sbuf, count, MPI_FLOAT, nbrbottom, 0, rbuf, count, MPI_FLOAT, nbrtop, 0, comm1d, status);
-}
-void MpiCommunicator::waitAll() { MPI_Waitall(4, request, status); }
-void MpiCommunicator::exchngData(float *sbuf_t, float *rbuf_t, float *sbuf_b, float *rbuf_b, int count)
-{
-    MPI_Sendrecv(sbuf_t, count, MPI_FLOAT, nbrtop, 0, rbuf_t, count, MPI_FLOAT, nbrbottom, 0, comm1d, status);
-    MPI_Sendrecv(sbuf_b, count, MPI_FLOAT, nbrbottom, 0, rbuf_b, count, MPI_FLOAT, nbrtop, 0, comm1d, status);
-}
-void MpiCommunicator::exchngDataNB(float *sbuf_t, int count_st, float *rbuf_t, int count_rt, float *sbuf_b, int count_sb,
-                                float *rbuf_b, int count_rb)
-{
-    MPI_Irecv(rbuf_t, count_rt, MPI_FLOAT, nbrbottom, 0, comm1d, &request[0]);
-    MPI_Irecv(rbuf_b, count_rb, MPI_FLOAT, nbrtop, 0, comm1d, &request[1]);
-    MPI_Isend(sbuf_t, count_st, MPI_FLOAT, nbrtop, 0, comm1d, &request[2]);
-    MPI_Isend(sbuf_b, count_sb, MPI_FLOAT, nbrbottom, 0, comm1d, &request[3]);
-    MPI_Waitall(4, request, status);
-}
-//////////////////////////////////////////////////////////////////////////
-// Crap by Martin Sch.
-void MpiCommunicator::exchngDataGPU(real *sbuf, int count_s, real *rbuf, int count_r, int nb_rank)
-{
-    MPI_Status MSstatus;
-    MPI_Send(sbuf, count_s, MPI_Type_GPU, nb_rank, 0, commGPU);
-    MPI_Recv(rbuf, count_r, MPI_Type_GPU, nb_rank, 0, commGPU, &MSstatus);
-    ////test only - please don't use
-    // MPI_Sendrecv(sbuf, count_s, MPI_Type_GPU, nb_rank, 0, rbuf, count_r, MPI_Type_GPU, nb_rank, 0, comm1d,
-    // MPI_STATUSES_IGNORE);
-}
-void MpiCommunicator::sendRecvGPU(real *sbuf, int count_s, real *rbuf, int count_r, int nb_rank)
-{
-    // test only - please don't use
-    MPI_Sendrecv(sbuf, count_s, MPI_Type_GPU, nb_rank, 0, rbuf, count_r, MPI_Type_GPU, nb_rank, 0, commGPU,
-                 MPI_STATUSES_IGNORE);
-}
-void MpiCommunicator::nbRecvDataGPU(real *rbuf, int count_r, int nb_rank)
-{
-    // printf("\n Start Recv Rank: %d, neighbor Rank: %d, request = %d \n", PID, nb_rank, (int)requestGPU.size());
-    // fflush(stdout);
-
-    requestGPU.push_back(0);
-    MPI_Irecv(rbuf, count_r, MPI_Type_GPU, nb_rank, 0, commGPU, &requestGPU[rcount]);
-    rcount++;
-
-    // printf("\n End Recv - Rank: %d , neighbor Rank: %d \n", PID, nb_rank);
-    // fflush(stdout);
-}
-void MpiCommunicator::nbSendDataGPU(real *sbuf, int count_s, int nb_rank)
-{
-    // printf("\n Start Send Rank: %d, neighbor Rank: %d, request = %d \n", PID, nb_rank, (int)requestGPU.size());
-    // fflush(stdout);
-
-    requestGPU.push_back(0);
-    MPI_Isend(sbuf, count_s, MPI_Type_GPU, nb_rank, 0, commGPU, &requestGPU[rcount]);
-    rcount++;
-
-    // printf("\n End Send - Rank: %d , neighbor Rank: %d \n", PID, nb_rank);
-    // fflush(stdout);
-}
-void MpiCommunicator::waitallGPU()
-{
-    // printf("\n Start Waitall Rank: %d, request = %d \n", PID, (int)requestGPU.size());
-    // fflush(stdout);
-    if (requestGPU.size() > 0) {
-        MPI_Waitall(static_cast<int>(requestGPU.size()), &requestGPU[0], MPI_STATUSES_IGNORE);
-        requestGPU.resize(0);
-        rcount = 0;
-    }
-    // printf("\n End Waitall \n");
-    // fflush(stdout);
-}
-void MpiCommunicator::sendDataGPU(real *sbuf, int count_s, int nb_rank)
-{
-    MPI_Send(sbuf, count_s, MPI_Type_GPU, nb_rank, 0, commGPU);
-}
-void MpiCommunicator::waitGPU(int id) { MPI_Wait(&requestGPU[id], MPI_STATUSES_IGNORE); }
-void MpiCommunicator::resetRequest()
-{
-    if (requestGPU.size() > 0) {
-        requestGPU.resize(0);
-        rcount = 0;
-    }
-}
-void MpiCommunicator::barrierGPU()
-{
-    // printf("\n Start Waitall Rank: %d, request = %d \n", PID, (int)requestGPU.size());
-    // fflush(stdout);
-    if (requestGPU.size() > 0) {
-        MPI_Barrier(commGPU);
-    }
-    // printf("\n End Waitall \n");
-    // fflush(stdout);
-}
-void MpiCommunicator::barrier() { MPI_Barrier(commGPU); }
-
-//////////////////////////////////////////////////////////////////////////
-void MpiCommunicator::exchngDataGeo(int *sbuf_t, int *rbuf_t, int *sbuf_b, int *rbuf_b, int count)
-{
-    MPI_Irecv(rbuf_t, count, MPI_INT, nbrbottom, 0, comm1d, &request[0]);
-    MPI_Irecv(rbuf_b, count, MPI_INT, nbrtop, 0, comm1d, &request[1]);
-    MPI_Isend(sbuf_t, count, MPI_INT, nbrtop, 0, comm1d, &request[2]);
-    MPI_Isend(sbuf_b, count, MPI_INT, nbrbottom, 0, comm1d, &request[3]);
-    MPI_Waitall(4, request, status);
-}
-int MpiCommunicator::getPID() const { return PID; }
-int MpiCommunicator::getNumberOfProcess() const { return numprocs; }
-int MpiCommunicator::getNeighbourTop() { return nbrtop; }
-int MpiCommunicator::getNeighbourBottom() { return nbrbottom; }
-MPI_Comm MpiCommunicator::getMpiCommunicator() { return comm1d; }
-void MpiCommunicator::distributeGeometry(unsigned int *dataRoot, unsigned int *dataNode, int dataSizePerNode)
-{
-    MPI_Scatter(dataRoot, dataSizePerNode, MPI_UNSIGNED, dataNode, dataSizePerNode, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
-}
-int MpiCommunicator::mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices,
-                                const int &maxdev)
-{
-    int device        = -1;
-    char *host        = (char *)malloc(sizeof(char) * size * 255);
-    unsigned int *map = (unsigned int *)malloc(sizeof(unsigned int) * size);
-
-    char hostname[255];
-    gethostname(hostname, 254);
-    hostname[254] = 0;
-
-    MPI_Gather(hostname, 255, MPI_BYTE, host, 255, MPI_BYTE, 0, MPI_COMM_WORLD);
-
-    int i, j;
-    if (rank == 0) {
-        for (i = 0; i < size; i++) {
-            int counter = 0;
-            for (j = 0; j < i; j++) {
-                if (strcmp(&host[i * 255], &host[j * 255]) == 0)
-                    counter++;
-            }
-            if (counter >= maxdev) {
-                VF_LOG_CRITICAL("More processes than GPUs!");
-                exit(1);
-            }
-            map[i] = devices[counter];
-        }
-    }
-
-    MPI_Scatter(map, 1, MPI_UNSIGNED, &device, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
-
-    VF_LOG_INFO("Rank: {} runs on host: {} with GPU: {}", rank, hostname, device);
-
-    free(map);
-    free(host);
-    return device;
-}
-
-double MpiCommunicator::reduceSum(double quantityPerProcess)
-{ 
-    double *buffer_send = &quantityPerProcess;
-    double *buffer_recv = (double *)malloc(sizeof(double));
-
-    MPI_Reduce(buffer_send, buffer_recv, 1, MPI_DOUBLE, MPI_SUM, 0, commGPU);
-
-    return *buffer_recv;
-}
-
-void MpiCommunicator::receive_send(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
-                         int size_buffer_send, int neighbor_rank_send) const
-{
-    MPI_Request recv_request;
-    MPI_Irecv(buffer_receive, size_buffer_recv, MPI_UNSIGNED, neighbor_rank_recv, 0, commGPU, &recv_request);
-    //printf("receive_send PID: %i,   nbRev: nb_rank_recv: %i", this->getPID(), nb_rank_r);
-    //fflush(stdout);
-    MPI_Send(buffer_send, size_buffer_send, MPI_UNSIGNED, neighbor_rank_send, 0, commGPU);
-    //printf("receive_send PID: %i,   sendUintGPU: nb_rank_send: %i", this->getPID(), nb_rank_s);
-    //fflush(stdout);
-    MPI_Wait(&recv_request, MPI_STATUSES_IGNORE);
-}
-
-} // namespace vf::gpu
diff --git a/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.h b/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.h
deleted file mode 100644
index c6a71c0bf2e292133db90f5a1e2110cb1c484c31..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Communication/MpiCommunicator.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef MPIMpiCommunicator_GPU_H
-#define MPIMpiCommunicator_GPU_H
-
-#include <vector>
-
-#include <mpi.h>
-
-#include "VirtualFluids_GPU_export.h"
-
-#include "Communicator.h"
-#include <basics/DataTypes.h>
-
-//////////////////////////////////
-#ifdef VF_DOUBLE_ACCURACY
-#define MPI_Type_GPU MPI_DOUBLE
-#else
-#define MPI_Type_GPU MPI_FLOAT
-#endif
-//////////////////////////////////
-
-namespace vf::gpu
-{
-
-class VIRTUALFLUIDS_GPU_EXPORT MpiCommunicator : public Communicator
-{
-public:
-    static MpiCommunicator &getInstance();
-    MpiCommunicator(const MpiCommunicator &) = delete;
-    MpiCommunicator &operator=(const MpiCommunicator &) = delete;
-    ~MpiCommunicator() override;
-
-    void exchngBottomToTop(float *sbuf, float *rbuf, int count);
-    void exchngTopToBottom(float *sbuf, float *rbuf, int count);
-    void waitAll() override;
-    void distributeGeometry(unsigned int *dataRoot, unsigned int *dataNode, int dataSizePerNode);
-    int getPID() const override;
-    int getNumberOfProcess() const override;
-    int getNeighbourTop();
-    int getNeighbourBottom();
-    void exchngData(float *sbuf_t, float *rbuf_t, float *sbuf_b, float *rbuf_b, int count) override;
-    void exchngDataNB(float *sbuf_t, int count_st, float *rbuf_t, int count_rt, float *sbuf_b, int count_sb,
-                      float *rbuf_b, int count_rb);
-    //////////////////////////////////////////////////////////////////////////
-    void exchngDataGPU(real *sbuf, int count_s, real *rbuf, int count_r, int nb_rank) override;
-    void sendRecvGPU(real *sbuf, int count_s, real *rbuf, int count_r, int nb_rank);
-    void nbRecvDataGPU(real *rbuf, int count_r, int nb_rank) override;
-    void nbSendDataGPU(real *sbuf, int count_s, int nb_rank) override;
-    void waitallGPU() override;
-    void sendDataGPU(real *sbuf, int count_s, int nb_rank) override;
-    void waitGPU(int id) override;
-    void resetRequest() override;
-    void barrierGPU();
-    void barrier();
-    //////////////////////////////////////////////////////////////////////////
-    void exchngDataGeo(int *sbuf_t, int *rbuf_t, int *sbuf_b, int *rbuf_b, int count);
-    MPI_Comm getMpiCommunicator();
-    int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev) override;
-    double reduceSum(double quantityPerProcess) override;
-    //////////////////////////////////////////////////////////////////////////
-    void receive_send(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
-                      int size_buffer_send, int neighbor_rank_send) const override;
-
-private:
-    int numprocs, PID;
-    int nbrbottom, nbrtop;
-    MPI_Comm comm1d, commGPU;
-    MPI_Status status[4];
-    MPI_Request request[4];
-    //////////////////////////////////////////////////////////////////////////
-    std::vector<MPI_Request> requestGPU;
-    int rcount;
-    //////////////////////////////////////////////////////////////////////////
-    MpiCommunicator();
-};
-
-} // namespace vf::gpu
-
-#endif
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 9b2d1c4f5fa742b46ecd9ad3a9f8e86b499909fb..e96c96ec2f38c4d27f7c51177e4a3c21f37579e8 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -9,7 +9,7 @@
 #include <GPU/CudaMemoryManager.h>
 
 
-std::shared_ptr<GridProvider> GridProvider::makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::CommunicationRoutine& communicator)
+std::shared_ptr<GridProvider> GridProvider::makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::parallel::Communicator& communicator)
 {
     return std::shared_ptr<GridProvider>(new GridGenerator(builder, para, cudaMemoryManager, communicator));
 }
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
index ee6c93a5f718a2e6907e178bf7b751fbaed824dd..28d2f39e8b469f83672c6cbce162e867686d14ed 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.h
@@ -9,9 +9,9 @@
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
 #include "gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
-namespace vf::gpu
+namespace vf::parallel
 {
-class CommunicationRoutine;
+class Communicator;
 }
 
 class Parameter;
@@ -21,7 +21,7 @@ class CudaMemoryManager;
 class VIRTUALFLUIDS_GPU_EXPORT GridProvider
 {
 public:
-    static std::shared_ptr<GridProvider> makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::CommunicationRoutine& communicator);
+    static std::shared_ptr<GridProvider> makeGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::parallel::Communicator& communicator);
     static std::shared_ptr<GridProvider> makeGridReader(FILEFORMAT format, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager);
 
     virtual void allocArrays_CoordNeighborGeo() = 0;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index e3c86317c3bf7e4ece5720ac8117e5f418b22fa4..db07322dd4158cb9f66aa15248f38ed4c43d383c 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -15,15 +15,16 @@
 #include "GridGenerator/TransientBCSetter/TransientBCSetter.h"
 
 #include "utilities/communication.h"
-#include "Communication/CommunicationRoutine.h"
+
+#include <parallel/Communicator.h>
 
 #include <logger/Logger.h>
 
 using namespace vf::lbm::dir;
 
 GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para,
-                             std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::CommunicationRoutine &communicator)
-    : mpiProcessID(communicator.getPID()), builder(builder)
+                             std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::parallel::Communicator &communicator)
+    : mpiProcessID(communicator.getProcessID()), builder(builder)
 {
     this->para = para;
     this->cudaMemoryManager = cudaMemoryManager;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
index b03de24ec452e37c45280c90497e95fd782ef26c..9c0d50a06e587c2776c63d164f1d46c4ac910eab 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
@@ -45,9 +45,9 @@ class Parameter;
 class GridBuilder;
 class IndexRearrangementForStreams;
 class InterpolationCellGrouper;
-namespace vf::gpu
+namespace vf::parallel
 {
-class CommunicationRoutine;
+class Communicator;
 }
 
 //! \class GridGenerator derived class of GridProvider
@@ -67,7 +67,7 @@ private:
     const uint mpiProcessID;
 
 public:
-    VIRTUALFLUIDS_GPU_EXPORT GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::CommunicationRoutine& communicator);
+    VIRTUALFLUIDS_GPU_EXPORT GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::parallel::Communicator& communicator);
     ~GridGenerator() override;
     //! \brief overwrites the default IndexRearrangementForStreams
     void setIndexRearrangementForStreams(std::unique_ptr<IndexRearrangementForStreams>&& indexRearrangement);
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
index 8685ea9db2d570b1aa6773659d954ac57333e366..8fc0f78d1f964339258dbeae1658445bc4547e4f 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGeneratorTest.cpp
@@ -1,17 +1,16 @@
 #include "GridGenerator.h"
 #include <gmock/gmock.h>
 
-#include "Communication/CommunicationRoutine.h"
-#include "Communication/MpiCommunicator.h"
 #include "DataTypes.h"
 #include "GPU/CudaMemoryManager.h"
 #include "IndexRearrangementForStreams.h"
+#include "NullCommunicator.h"
 #include "Parameter/Parameter.h"
 #include "gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
 #include "gpu/GridGenerator/grid/GridImp.h"
 #include "gpu/GridGenerator/utilities/communication.h"
 
-#include "Communication/CommunicationRoutineMocks.h"
+#include <parallel/NullCommunicator.h>
 
 namespace GridGeneratorTest
 {
@@ -72,7 +71,7 @@ class IndexRearrangementForStreamsDouble : public IndexRearrangementForStreams
 {
 public:
     IndexRearrangementForStreamsDouble(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder,
-                                       vf::gpu::CommunicationRoutine &communicator)
+                                       vf::parallel::Communicator &communicator)
         : IndexRearrangementForStreams(para, builder, communicator){};
 
     void initCommunicationArraysForCommAfterFinetoCoarseX(uint level, int indexOfProcessNeighbor,
@@ -116,7 +115,7 @@ private:
         para->setNumprocs(2);
 
         builder = std::make_shared<LevelGridBuilderStub>(nullptr);
-        vf::gpu::test::CommunicationRoutineTestDouble communicator;
+        vf::parallel::NullCommunicator communicator;
 
         gridGenerator = std::make_shared<GridGenerator>(builder, para, std::make_shared<CudaMemoryManagerDouble>(para),
                                                         communicator);
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
index d59fa8d017069b665bd6b91f6cf1e685782fab24..bfd112b7c03afba459b8bef7919bc839ac200f2e 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.cpp
@@ -1,17 +1,18 @@
 #include "IndexRearrangementForStreams.h"
 
-#include "Communication/Communicator.h"
 #include "Logger.h"
 #include "Parameter/Parameter.h"
 #include <GridGenerator/grid/Grid.h>
 #include <GridGenerator/grid/GridBuilder/GridBuilder.h>
 
+#include <parallel/Communicator.h>
+
 #include <algorithm>
 #include <iostream>
 
 IndexRearrangementForStreams::IndexRearrangementForStreams(std::shared_ptr<Parameter> para,
                                                            std::shared_ptr<GridBuilder> builder,
-                                                           vf::gpu::CommunicationRoutine &communicator)
+                                                           vf::parallel::Communicator &communicator)
     : para(para), builder(builder), communicator(communicator)
 {
 }
@@ -108,7 +109,7 @@ std::vector<uint> IndexRearrangementForStreams::exchangeIndicesForCommAfterFtoCX
     std::vector<uint> recvIndicesForCommAfterFtoCPositions(
         (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCX[indexOfProcessNeighbor].numberOfNodes * 2, 0);
 
-    communicator.receive_send(
+    communicator.receiveSend(
         recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
         para->getParH(level)->recvProcessNeighborX[indexOfProcessNeighbor].rankNeighbor,
         sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
@@ -135,7 +136,7 @@ std::vector<uint> IndexRearrangementForStreams::exchangeIndicesForCommAfterFtoCY
     std::vector<uint> recvIndicesForCommAfterFtoCPositions(
         (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCY[indexOfProcessNeighbor].numberOfNodes * 2, 0);
 
-    communicator.receive_send(
+    communicator.receiveSend(
         recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
         para->getParH(level)->recvProcessNeighborY[indexOfProcessNeighbor].rankNeighbor,
         sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
@@ -162,7 +163,7 @@ std::vector<uint> IndexRearrangementForStreams::exchangeIndicesForCommAfterFtoCZ
     std::vector<uint> recvIndicesForCommAfterFtoCPositions(
         (size_t)para->getParH(level)->sendProcessNeighborsAfterFtoCZ[indexOfProcessNeighbor].numberOfNodes * 2, 0);
 
-    communicator.receive_send(
+    communicator.receiveSend(
         recvIndicesForCommAfterFtoCPositions.data(), (int)recvIndicesForCommAfterFtoCPositions.size(),
         para->getParH(level)->recvProcessNeighborZ[indexOfProcessNeighbor].rankNeighbor,
         sendIndicesForCommAfterFtoCPositions.data(), (int)sendIndicesForCommAfterFtoCPositions.size(),
@@ -368,7 +369,7 @@ void IndexRearrangementForStreams::reorderSendIndicesForCommAfterFtoC(
     for (uint i = 0; i < (uint)sendIndicesOther.size(); i++)
         sendIndices[i + numberOfSendNodesAfterFtoC] = sendIndicesOther[i];
 
-    VF_LOG_INFO("Reorder send indices: process {}, numberOfSendNodesAfterFtoC {}", communicator.getPID(),
+    VF_LOG_INFO("Reorder send indices: process {}, numberOfSendNodesAfterFtoC {}", communicator.getProcessID(),
                 numberOfSendNodesAfterFtoC);
 
     if (numberOfSendNodesAfterFtoC + sendIndicesOther.size() != numberOfSendIndices) {
@@ -514,7 +515,7 @@ void IndexRearrangementForStreams::reorderRecvIndicesForCommAfterFtoC(
     for (uint i = 0; i < (uint)recvIndicesOther.size(); i++)
         recvIndices[i + numberOfRecvNodesAfterFtoC] = recvIndicesOther[i];
 
-    VF_LOG_INFO("Reorder send indices: process {}, numberOfRecvNodesAfterFtoC {}", communicator.getPID(),
+    VF_LOG_INFO("Reorder send indices: process {}, numberOfRecvNodesAfterFtoC {}", communicator.getProcessID(),
                 numberOfRecvNodesAfterFtoC);
 
     if (numberOfRecvNodesAfterFtoC + recvIndicesOther.size() != numberOfRecvIndices) {
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
index 0b0401d3424f7a953bf2fa92d0382fd9256a635a..421e5aa4cb69cf56df79d9d4c2f0beb4504ce2c3 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h
@@ -13,16 +13,16 @@
 
 class Parameter;
 class GridBuilder;
-namespace vf::gpu
+namespace vf::parallel
 {
-class CommunicationRoutine;
+class Communicator;
 }
 
 class IndexRearrangementForStreams
 {
 public:
     //! \brief Construct IndexRearrangementForStreams object
-    IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder, vf::gpu::CommunicationRoutine& communicator);
+    IndexRearrangementForStreams(std::shared_ptr<Parameter> para, std::shared_ptr<GridBuilder> builder, vf::parallel::Communicator& communicator);
 
     virtual ~IndexRearrangementForStreams() = default;
 
@@ -133,7 +133,7 @@ protected:
 private:
     std::shared_ptr<GridBuilder> builder;
     std::shared_ptr<Parameter> para;
-    vf::gpu::CommunicationRoutine& communicator;
+    vf::parallel::Communicator &communicator;
 
     // used for tests
     friend class IndexRearrangementForStreamsTest_reorderSendIndices;
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
index a8bc58488e6ebad6b38ae894e1c9c194e64c43c8..bcdb22b9ce04cc2164feb6d0ed45ac23f569712e 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
@@ -6,7 +6,6 @@
 
 #include "Utilities/testUtilitiesGPU.h"
 
-#include "Communication/Communicator.h"
 #include "DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreams.h"
 #include "Parameter/Parameter.h"
 #include "basics/config/ConfigurationFile.h"
@@ -14,7 +13,7 @@
 #include "gpu/GridGenerator/grid/GridImp.h"
 #include "gpu/GridGenerator/utilities/communication.h"
 
-#include "Communication/CommunicationRoutineMocks.h"
+#include <parallel/NullCommunicator.h>
 
 namespace indexRearrangementTests
 {
@@ -152,7 +151,7 @@ private:
             IndexRearrangementForStreams(para, builder, communicator));
     };
 
-    vf::gpu::test::CommunicationRoutineTestDouble communicator;
+    vf::parallel::NullCommunicator communicator;
 };
 
 TEST_F(IndexRearrangementForStreamsTest_reorderSendIndices, reorderSendIndicesForCommAfterFtoCX)
@@ -174,19 +173,19 @@ TEST_F(IndexRearrangementForStreamsTest_reorderSendIndices, reorderSendIndicesFo
 // Test exchangeIndicesForCommAfterFtoC
 //////////////////////////////////////////////////////////////////////////
 
-class CommunicationRoutineDouble : public vf::gpu::CommunicationRoutine
+class CommunicatorDouble : public vf::parallel::NullCommunicator
 {
 public:
-    void receive_send(uint *buffer_receive, int, int, uint *, int, int) const override
+    void receiveSend(uint *buffer_receive, int, int, uint *, int, int) const override
     {
         for (int i = 0; i < (int)receivedIndices.size(); ++i) {
             *(buffer_receive + i) = receivedIndices[i];
         }
     }
 
-    int getPID() const override
+    void receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                     int neighbor_rank) const override
     {
-        return 0;
     }
 
     void setReceivedIndices(const std::vector<uint>& receivedIndices)
@@ -202,9 +201,9 @@ class IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX : public
 {
 
 public:
-    void createTestSubject(vf::gpu::CommunicationRoutine &CommunicationRoutine)
+    void createTestSubject(vf::parallel::Communicator &Communicator)
     {
-        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, CommunicationRoutine);
+        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, Communicator);
     }
 
 protected:
@@ -243,7 +242,7 @@ private:
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, emptyRecvInX)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices(std::vector<uint>());
     createTestSubject(communicator);
 
@@ -253,7 +252,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, emptyR
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, zeroRecvIndexX)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices({ 0 });
     createTestSubject(communicator);
 
@@ -263,7 +262,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, zeroRe
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, oneRecvIndexX)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10 };
     std::vector<uint> receivedIndicesByComm(4, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -277,7 +276,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, oneRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, threeRecvIndicesX)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30 };
     std::vector<uint> receivedIndicesByComm(5, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -292,7 +291,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, threeR
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, sixRecvIndicesX)
 {
     // this test shows the limits of the current approach. The last index is always deleted
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30, 40, 50 };
     std::vector<uint> receivedIndicesByComm = { 10, 20, 30, 40, 50, 60 };
     communicator.setReceivedIndices(receivedIndicesByComm);
@@ -305,7 +304,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, sixRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCX, recvIndicesXContainZero)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 0, 20, 30, 40 };
     std::vector<uint> receivedIndicesByComm(6, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -321,9 +320,9 @@ class IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY : public
 {
 
 public:
-    void createTestSubject(vf::gpu::CommunicationRoutine &CommunicationRoutine)
+    void createTestSubject(vf::parallel::Communicator &Communicator)
     {
-        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, CommunicationRoutine);
+        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, Communicator);
     }
 
 protected:
@@ -362,7 +361,7 @@ private:
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, emptyRecvInY)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices(std::vector<uint>());
     createTestSubject(communicator);
 
@@ -372,7 +371,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, emptyR
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, zeroRecvIndexY)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices({ 0 });
     createTestSubject(communicator);
 
@@ -382,7 +381,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, zeroRe
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, oneRecvIndexY)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10 };
     std::vector<uint> receivedIndicesByComm(4, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -396,7 +395,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, oneRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, threeRecvIndicesY)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30 };
     std::vector<uint> receivedIndicesByComm(5, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -411,7 +410,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, threeR
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, sixRecvIndicesY)
 {
     // this test shows the limits of the current approach. The last index is always deleted
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30, 40, 50 };
     std::vector<uint> receivedIndicesByComm = { 10, 20, 30, 40, 50, 60 };
     communicator.setReceivedIndices(receivedIndicesByComm);
@@ -424,7 +423,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, sixRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCY, recvIndicesYContainZero)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 0, 20, 30, 40 };
     std::vector<uint> receivedIndicesByComm(6, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -440,9 +439,9 @@ class IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ : public
 {
 
 public:
-    void createTestSubject(vf::gpu::CommunicationRoutine &CommunicationRoutine)
+    void createTestSubject(vf::parallel::Communicator &Communicator)
     {
-        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, CommunicationRoutine);
+        sut = std::make_unique<IndexRearrangementForStreams>(para, builder, Communicator);
     }
 
 protected:
@@ -481,7 +480,7 @@ private:
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, emptyRecvInZ)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices(std::vector<uint>());
     createTestSubject(communicator);
 
@@ -491,7 +490,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, emptyR
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, zeroRecvIndexZ)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     communicator.setReceivedIndices({ 0 });
     createTestSubject(communicator);
 
@@ -501,7 +500,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, zeroRe
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, oneRecvIndexZ)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10 };
     std::vector<uint> receivedIndicesBZComm(4, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesBZComm.begin());
@@ -515,7 +514,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, oneRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, threeRecvIndicesZ)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30 };
     std::vector<uint> receivedIndicesBZComm(5, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesBZComm.begin());
@@ -530,7 +529,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, threeR
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, sixRecvIndicesYZ)
 {
     // this test shows the limits of the current approach. The last index is always deleted
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 10, 20, 30, 40, 50 };
     std::vector<uint> receivedIndicesByComm = { 10, 20, 30, 40, 50, 60 };
     communicator.setReceivedIndices(receivedIndicesByComm);
@@ -543,7 +542,7 @@ TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, sixRec
 
 TEST_F(IndexRearrangementForStreamsTest_exchangeIndicesForCommAfterFtoCZ, recvIndicesZContainZero)
 {
-    CommunicationRoutineDouble communicator;
+    CommunicatorDouble communicator;
     std::vector<uint> expected = { 0, 20, 30, 40 };
     std::vector<uint> receivedIndicesByComm(6, 0);
     std::copy(expected.begin(), expected.end(), receivedIndicesByComm.begin());
@@ -614,7 +613,7 @@ private:
             IndexRearrangementForStreams(para, builder, communicator));
     };
 
-    vf::gpu::test::CommunicationRoutineTestDouble communicator;
+    vf::parallel::NullCommunicator communicator;
 };
 
 TEST_F(IndexRearrangementForStreamsTest_reorderRecvIndicesX, noSendIndicesForCommunicationAfterScalingFineToCoarse_receiveIndicesAreUnchanged)
diff --git a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
index e447062d292908c02800c4559cc4444476290629..242038afcea351c0d9187a606d82d5bcd16c214c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/EnstrophyAnalyzer.cu
@@ -13,7 +13,7 @@
 
 #include <iomanip>
 
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 #include "Parameter/Parameter.h"
 // includes, kernels
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
index df88632f8fd48b3ae8d50b444a65076ab0a0c12f..59b1b6494479e256e910d750e239d707573e2f71 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/KineticEnergyAnalyzer.cu
@@ -14,7 +14,7 @@
 
 #include <iomanip>
 
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 #include "Parameter/Parameter.h"
 // includes, kernels
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index 0a54db35bc4598702f3c3a3b194eb054a9ca478a..e9bd3ea551e93cb82baa85bf759d93b7a7d4dde0 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -11,7 +11,7 @@
 #include <helper_cuda.h>
 
 #include "LBM/LB.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 // includes, kernels
 #include "GPU/GPU_Kernels.cuh"
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
index d00941aba35d2885e893eea1ffe23c89002046c4..be1159d15440c63187a8e29e7db89343ca785be4 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
@@ -34,7 +34,7 @@
 #include "TurbulentViscosityKernels.h"
 #include "basics/constants/NumericConstants.h"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "LBM/LB.h"
diff --git a/src/gpu/VirtualFluids_GPU/Init/VfReader.cpp b/src/gpu/VirtualFluids_GPU/Init/VfReader.cpp
index 46f6254f7cf2e8ce53ee6f47d69db76405b52dd6..1406abff0ed77399333fda33353f9f231953d3df 100644
--- a/src/gpu/VirtualFluids_GPU/Init/VfReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/VfReader.cpp
@@ -1,7 +1,6 @@
 #include "Init/VfReader.h"
 
 #include "Parameter/Parameter.h"
-#include "Communication/Communicator.h"
 #include "Init/PositionReader.h"
 #include "GPU/CudaMemoryManager.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/B92/B92CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/B92/B92CompressibleNavierStokes.cu
index b708caf38990e563ab65caec26a68294c8589361..cf5971023e5f1110dd8bfb11bff38626ecc5dc7a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/B92/B92CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/B92/B92CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "B92CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<B92CompressibleNavierStokes> B92CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/C06/C06CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/C06/C06CompressibleNavierStokes.cu
index ca943474683ca9b5517ef97626ad2449b74119f2..2f906587857bfa6cf90d0441a5488efefc140f3d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/C06/C06CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/C06/C06CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "C06CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<C06CompressibleNavierStokes> C06CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K08/K08CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K08/K08CompressibleNavierStokes.cu
index d56b9f2ecea4704be7c5c5d6d843492e74312f3e..7e8a0f5af702da465116320c0a02638b4a5274f9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K08/K08CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K08/K08CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "K08CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K08CompressibleNavierStokes> K08CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesBulkViscosity.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesBulkViscosity.cu
index cb3aa849804a46aaaa1b5af4ad42b95e367def46..b52f6cc855a5d4dba45b3106841e7951a1d7f3b3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesBulkViscosity.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesBulkViscosity.cu
@@ -2,7 +2,7 @@
 
 #include "K15CompressibleNavierStokesBulkViscosity_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K15CompressibleNavierStokesBulkViscosity> K15CompressibleNavierStokesBulkViscosity::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesSponge.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesSponge.cu
index 54e4b073123fe3bae15b8e36e4f8e12aa35cee7b..eadf2a147ed526943d1ada85474dfd0ba522cd38 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesSponge.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K15/K15CompressibleNavierStokesSponge.cu
@@ -2,7 +2,7 @@
 
 #include "K15CompressibleNavierStokesSponge_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K15CompressibleNavierStokesSponge> K15CompressibleNavierStokesSponge::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K18/K18CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K18/K18CompressibleNavierStokes.cu
index 801c7d90576324559873afa81e2d5f543219d5ae..efc90492b18c2360c49f523e26c4bcfe2b594e47 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K18/K18CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K18/K18CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "K18CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K18CompressibleNavierStokes> K18CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K20/K20CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K20/K20CompressibleNavierStokes.cu
index b47f8b633d349a5d128e4bbeed1d642261e6ea3b..1a07b5a09614e38bd589b3f02d70b831d73b587e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K20/K20CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/K20/K20CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "K20CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K20CompressibleNavierStokes> K20CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/M02/M02CompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/M02/M02CompressibleNavierStokes.cu
index bc8d4e11f1efd9325c005846164b24a8459242ce..ffba813e2626c61323c13a5d4ac74d331e6e6c78 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/M02/M02CompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Compressible/FluidFlow/M02/M02CompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "M02CompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<M02CompressibleNavierStokes> M02CompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B15/B15IncompressibleNavierStokesBGKplus.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B15/B15IncompressibleNavierStokesBGKplus.cu
index 3bc4fcbecfe656cf1f1b451ab52e1a2f192908f1..846c2effc1d7df08f76e9752ed706e821254aba7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B15/B15IncompressibleNavierStokesBGKplus.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B15/B15IncompressibleNavierStokesBGKplus.cu
@@ -2,7 +2,7 @@
 
 #include "B15IncompressibleNavierStokesBGKplus_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<B15IncompressibleNavierStokesBGKplus> B15IncompressibleNavierStokesBGKplus::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B92/B92IncompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B92/B92IncompressibleNavierStokes.cu
index 730eb540739ffb7461d2b5cd458d4e790ce9d369..99f5fda8377bd7ce5bd4d486db5e2a680add7e84 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B92/B92IncompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/B92/B92IncompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "B92IncompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<B92IncompressibleNavierStokes> B92IncompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/C06/C06IncompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/C06/C06IncompressibleNavierStokes.cu
index eebe0587bffd55749ed16cd5bec976cb100cb457..16f6ffd99e8e7b68b954976fd79f0f2aeb7370b9 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/C06/C06IncompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/C06/C06IncompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "CascadeIncompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<C06IncompressibleNavierStokes> C06IncompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokes.cu
index bfbf84b4073a87cbbf833ea63ae9d457ac5965f4..f457eb98e856733795681f5b285d82decc6ff2b6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "K15IncompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K15IncompressibleNavierStokes> K15IncompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokesIsoCheck.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokesIsoCheck.cu
index e975bf9df0dce6ff1ebd3d463c3061013b526354..0db31e686cfce85c19ae5b4aa37fef71ecc6e354 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokesIsoCheck.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/K15/K15IncompressibleNavierStokesIsoCheck.cu
@@ -2,7 +2,7 @@
 
 #include "K15IncompressibleNavierStokesIsoCheck_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<K15IncompressibleNavierStokesIsoCheck> K15IncompressibleNavierStokesIsoCheck::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/M02/M02IncompressibleNavierStokes.cu b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/M02/M02IncompressibleNavierStokes.cu
index 71660b82ed7b439f912679d8bd4617e6b6e2a012..a08eef4a381b06729da8ff26a0ab305e5cf3615c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/M02/M02IncompressibleNavierStokes.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Incompressible/FluidFlow/M02/M02IncompressibleNavierStokes.cu
@@ -2,7 +2,7 @@
 
 #include "M02IncompressibleNavierStokes_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<M02IncompressibleNavierStokes> M02IncompressibleNavierStokes::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index 84e5f3f6ac08b92ccd92fbf142cceb3245de51d5..00d392c9e8ff7b8c55b75f50327946b0140a3d68 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -7,7 +7,7 @@
 
 #include <memory>
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 
 class CheckParameterStrategy;
 class Parameter;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
index 4d4467acb20232ecb364451f61265b71f1692517..21ad7202a62a685bc18b8a0c44e093e487e8eb84 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27.cu
@@ -2,7 +2,7 @@
 
 #include "ADComp27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<ADComp27> ADComp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
index d218489c754edc89f99277670f09536962ce62b2..fa6d378353aa5058f1b58979a517e8490d7d3916 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7.cu
@@ -2,7 +2,7 @@
 
 #include "ADComp7_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<ADComp7> ADComp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
index 3c10e7a6996a9a26668a18390ddee4e2cbbec853..22782ddf6aebb6b860c3dc1e2b1a384223931085 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27.cu
@@ -2,7 +2,7 @@
 
 #include "ADIncomp27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<ADIncomp27> ADIncomp27::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
index 71adc96eef733084e01fa963f6d0fad66a2e1062..613491dcfc4e124fed2826f5439ae7a2b99544b7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7.cu
@@ -2,7 +2,7 @@
 
 #include "ADIncomp7_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<ADIncomp7> ADIncomp7::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index dddc795ccda96b49a03373c05a701f32662e9565..eb4e4f7bec0a23d4658825f26c26bdb637cb0c58 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -4,10 +4,8 @@
 
 #include <helper_timer.h>
 
-
 #include "Factories/GridScalingFactory.h"
 #include "LBM/LB.h"
-#include "Communication/Communicator.h"
 #include "Communication/ExchangeData27.h"
 #include "Parameter/Parameter.h"
 #include "Parameter/CudaStreamManager.h"
@@ -58,11 +56,12 @@
 #include "Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
 #include "Kernel/Kernel.h"
 #include "TurbulenceModels/TurbulenceModelFactory.h"
-#include <cuda/DeviceInfo.h>
 
-#include <logger/Logger.h>
+#include <cuda_helper/DeviceInfo.h>
 
+#include <logger/Logger.h>
 
+#include <parallel/Communicator.h>
 
 std::string getFileName(const std::string& fname, int step, int myID)
 {
@@ -70,7 +69,7 @@ std::string getFileName(const std::string& fname, int step, int myID)
 }
 
 Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
-                       vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, GridScalingFactory* scalingFactory)
+                       vf::parallel::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, GridScalingFactory* scalingFactory)
     : para(para), cudaMemoryManager(memoryManager), communicator(communicator), kernelFactory(std::make_unique<KernelFactoryImp>()),
       preProcessorFactory(std::make_shared<PreProcessorFactoryImp>()), dataWriter(std::make_unique<FileWriter>())
 {
@@ -79,7 +78,7 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
 }
 
 Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
-                       vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory* scalingFactory)
+                       vf::parallel::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory* scalingFactory)
     : para(para), cudaMemoryManager(memoryManager), communicator(communicator), kernelFactory(std::make_unique<KernelFactoryImp>()),
       preProcessorFactory(std::make_shared<PreProcessorFactoryImp>()), dataWriter(std::make_unique<FileWriter>())
 {
@@ -90,8 +89,7 @@ void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFa
 {
     gridProvider.initalGridInformations();
 
-    vf::cuda::verifyAndSetDevice(
-        communicator.mapCudaDevice(para->getMyProcessID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
+    vf::cuda::verifyAndSetDevice(communicator.mapCudaDevicesOnHosts(para->getDevices(), para->getMaxDev()));
 
     para->initLBMSimulationParameter();
 
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index ba2a321707db4138aee9e1c30bae4dede017a5b8..146ab4cf6aebe395bff695f3e99aa7b6b1e776d3 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -6,11 +6,13 @@
 
 #include <PointerDefinitions.h>
 
-#include "Utilities/Buffer2D.hpp"
 #include "LBM/LB.h"
+#include "Utilities/Buffer2D.hpp"
 
-
-namespace vf::gpu { class Communicator; }
+namespace vf::parallel
+{
+class Communicator;
+}
 
 class CudaMemoryManager;
 class Parameter;
@@ -37,9 +39,9 @@ class Simulation
 {
 public:
     Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
-               vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, GridScalingFactory* scalingFactory = nullptr);	
+               vf::parallel::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, GridScalingFactory* scalingFactory = nullptr);	
 	Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
-               vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory* scalingFactory = nullptr);
+               vf::parallel::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory, GridScalingFactory* scalingFactory = nullptr);
 
     ~Simulation();
     void run();
@@ -76,7 +78,7 @@ private:
 	Buffer2D <int> geo_rbuf_b;
 
 
-	vf::gpu::Communicator& communicator;
+	vf::parallel::Communicator& communicator;
     SPtr<Parameter> para;
     std::shared_ptr<DataWriter> dataWriter;
 	std::shared_ptr<CudaMemoryManager> cudaMemoryManager;
diff --git a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
index e5062a8ec63940ab6e23e567c0674681d4af6509..df6b2e6623977331bf4b64f2ea798deb7700020f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
+++ b/src/gpu/VirtualFluids_GPU/Output/DistributionDebugInspector.cu
@@ -37,7 +37,7 @@
 #include "basics/constants/NumericConstants.h"
 #include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 #include <cuda.h>
 
 #include <iostream>
diff --git a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
index ee5333dfc130ac7dfdf7ab8c4de812a2916777fa..eb43a5f1fb85e538d4a930ba43fdf546b5c878a8 100644
--- a/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/EdgeNodeDebugWriter.hpp
@@ -3,7 +3,7 @@
 
 #include <fstream>
 #include <sstream>
-#include <stdio.h>
+#include <cstdio>
 // #include <math.h>
 #include "StringUtilities/StringUtil.h"
 #include "lbm/constants/D3Q27.h"
@@ -13,7 +13,6 @@
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 #include <cmath>
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
 
 namespace EdgeNodeDebugWriter
 {
@@ -25,7 +24,7 @@ void addCoordinatesToNodeVector(SPtr<LBMSimulationParameter> parH, std::vector<U
             nodesVec[indexInNodesVector] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
 }
 
-void writeEdgeNodesXZ_Send(SPtr<Parameter> para)
+void writeEdgeNodesXZ_Send(SPtr<Parameter> para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
     std::vector<std::string> datanames = { "SparseIndex", "ProcessNeighbor", "IndexInSendVector", "AfterFtoC" };
@@ -54,14 +53,14 @@ void writeEdgeNodesXZ_Send(SPtr<Parameter> para)
             nodeCount++;
         }
         std::string filenameVec = para->getFName() + "_writeEdgeNodesXZ_Send_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
     }
 }
 
-void writeEdgeNodesXZ_Recv(SPtr<Parameter> para)
+void writeEdgeNodesXZ_Recv(SPtr<Parameter> para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
     std::vector<std::string> datanames = { "SparseIndex", "ProcessNeighbor", "IndexInRecvVector", "AfterFtoC" };
@@ -90,7 +89,7 @@ void writeEdgeNodesXZ_Recv(SPtr<Parameter> para)
             nodeCount++;
         }
         std::string filenameVec = para->getFName() + "_writeEdgeNodesXZ_Recv_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
index 4af9a50a123588e25f1ca9faa5c18581601f69d2..da5307c3d8080d7d10ad91dc970639fdd83dd2dc 100644
--- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
@@ -11,8 +11,6 @@
 #include <basics/writer/WbWriterVtkXmlBinary.h>
 #include <cmath>
 
-#include "VirtualFluids_GPU/Communication/MpiCommunicator.h"
-
 namespace InterfaceDebugWriter
 {
 
@@ -603,7 +601,7 @@ void checkForSendNodeZ(int pos, int &sendDir, int &sendDirectionInCommAfterFtoC,
                            para->getParH(level)->sendProcessNeighborsAfterFtoCZ, 8.0);
 }
 
-void writeInterfaceFCC_Send(Parameter *para)
+void writeInterfaceFCC_Send(Parameter *para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
     int nodeNumberVec = 0;
@@ -650,14 +648,14 @@ void writeInterfaceFCC_Send(Parameter *para)
             nodeCount++;
         }
         std::string filenameVec = para->getFName() + "_writeInterfaceFCC_Send_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
     }
 }
 
-void writeInterfaceCFC_Recv(Parameter *para)
+void writeInterfaceCFC_Recv(Parameter *para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
     int nodeNumberVec = 0;
@@ -703,7 +701,7 @@ void writeInterfaceCFC_Recv(Parameter *para)
             nodeCount++;
         }
         std::string filenameVec = para->getFName() + "_writeInterfaceCFC_Recv_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
@@ -718,7 +716,7 @@ void addToNodesVector(const int level, const int pos, std::vector<UbTupleFloat3>
     nodesVec.push_back(makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
 }
 
-void writeSendNodesStream(Parameter *para)
+void writeSendNodesStream(Parameter *para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
 
@@ -808,14 +806,14 @@ void writeSendNodesStream(Parameter *para)
             }
         }
         std::string filenameVec = para->getFName() + "_writeSendNodesStreams_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
     }
 }
 
-void writeRecvNodesStream(Parameter *para)
+void writeRecvNodesStream(Parameter *para, int processID = 0)
 {
     std::vector<UbTupleFloat3> nodesVec;
 
@@ -894,7 +892,7 @@ void writeRecvNodesStream(Parameter *para)
         // Recv are nodes ghost nodes and therefore they can't be coarse cells for the interpolation from coarse to fine
 
         std::string filenameVec = para->getFName() + "_writeRecvNodesStreams_PID_" +
-                                  std::to_string(vf::gpu::MpiCommunicator::getInstance().getPID()) + "_" +
+                                  std::to_string(processID) + "_" +
                                   StringUtil::toString<int>(level);
 
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
index e506a56bb76a263ac8982a7e53f39e67c268e49b..a05aad82137fc378b7f899d31c1b06d53199d694 100644
--- a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
@@ -11,7 +11,6 @@
 
 #include "StringUtilities/StringUtil.h"
 #include "Utilities/FindNeighbors.h"
-#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
 
 namespace NeighborDebugWriter
 {
diff --git a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
index d075c78e53a45e96adea43c8846159f4ba128c6d..5448db1329885ca542c951b3d068f3ab48fe502c 100644
--- a/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/QDebugVtkWriter.hpp
@@ -11,7 +11,6 @@
 #include <logger/Logger.h>
 
 #include "gpu/GridGenerator/grid/NodeValues.h"
-#include "gpu/VirtualFluids_GPU/Communication/Communicator.h"
 #include "gpu/VirtualFluids_GPU/LBM/LB.h"
 #include "gpu/VirtualFluids_GPU/Parameter/Parameter.h"
 #include "gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h"
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
index f6efff58440bd786d57a3ccb44d2271c29761323..a3048e62d674aff4e21a93292f694541dc86e36f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
@@ -4,7 +4,8 @@
 
 #include "UbScheduler.h"
 #include "Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Communication/Communicator.h"
+
+#include <parallel/Communicator.h>
 
 void Timer::initTimer()
 {
@@ -31,7 +32,7 @@ void Timer::resetTimer()
         this->totalElapsedTime = 0.0;
 }
 
-void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator)
+void Timer::outputPerformance(uint t, Parameter* para, vf::parallel::Communicator& communicator)
 {
     real fnups      = 0.0;
     real bandwidth  = 0.0;
@@ -42,18 +43,18 @@ void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& co
         bandwidth   += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTimestepStart()) * para->getParH(lev)->numberOfNodes  / (this->totalElapsedTime*1.0E9);
     }
 
-    if(this->firstOutput && communicator.getPID() == 0) //only display the legend once
+    if(this->firstOutput && communicator.getProcessID() == 0) //only display the legend once
     {
         VF_LOG_INFO("PID \t --- {} ---  Processing time (ms) \t Nups in Mio \t Bandwidth in GB/sec", this->name );
         this->firstOutput = false;
     }
 
-    VF_LOG_INFO(" {} \t --- {} --- {:>8.1f}/ {:<8.1f} \t   {:5.1f} \t       {:4.1f}",  communicator.getPID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth);
+    VF_LOG_INFO(" {} \t --- {} --- {:>8.1f}/ {:<8.1f} \t   {:5.1f} \t       {:4.1f}",  communicator.getProcessID(), this->name, this->elapsedTime, this->totalElapsedTime, fnups, bandwidth);
 
     // When using multiple GPUs, sum the nups of all processes
-    if (communicator.getNumberOfProcess() > 1) {
-        double nupsSum =  communicator.reduceSum(fnups);
-        if (communicator.getPID() == 0)
-            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNumberOfProcess(), nupsSum);
+    if (communicator.getNumberOfProcesses() > 1) {
+        double nupsSum = communicator.reduceSum(fnups);
+        if (communicator.getProcessID() == 0)
+            VF_LOG_INFO("Sum of all {} processes: Nups in Mio: {:.1f}", communicator.getNumberOfProcesses(), nupsSum);
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h
index 55ada64ad245ee41aa99a1185eba134a652067c9..fd76c66703c2a60d9aed26365f415fe3c93e7e1f 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.h
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h
@@ -6,14 +6,15 @@
 #include "Parameter/Parameter.h"
 #include <logger/Logger.h>
 
-namespace vf::gpu{
-    class Communicator;
+namespace vf::parallel
+{
+class Communicator;
 }
 class Parameter;
 
 class Timer
 {
-    public:
+public:
     Timer(std::string _name): name(_name)
     {
         this->initTimer();
@@ -29,13 +30,12 @@ class Timer
     void startTimer();
     void stopTimer();
     void resetTimer();
-    void outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& communicator);
+    void outputPerformance(uint t, Parameter* para, vf::parallel::Communicator& communicator);
 
     float getElapsedTime(){ return this->elapsedTime; }
     float getTotalElapsedTime(){ return this->totalElapsedTime; }
 
-    private:
-    
+private:
     cudaEvent_t start_t, stop_t;
     float elapsedTime = 0.0;
     float totalElapsedTime = 0.0;
@@ -44,6 +44,4 @@ class Timer
     bool firstOutput = true;
 };
 
-
-
 #endif 
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
index e17c9f1bd1a167198423dbe7e31f7d55b3af2a65..65ee97243d9f1df69d1592419a707a6667d91fcf 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
@@ -9,13 +9,14 @@
 #include "PointerDefinitions.h"
 #include "basics/config/ConfigurationFile.h"
 
+#include "DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "Factories/BoundaryConditionFactory.h"
 #include "Factories/GridScalingFactory.h"
-#include "Communication/Communicator.h"
-#include "DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "GPU/CudaMemoryManager.h"
 #include "gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
 
+#include <parallel/Communicator.h>
+
 TEST(ParameterTest, passingEmptyFileWithoutPath_ShouldNotThrow)
 {
     // assuming that the config files is stored parallel to this file.
@@ -212,7 +213,7 @@ class MockGridGenerator : public GridGenerator
 
 public:
     MockGridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para,
-                      std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator &communicator)
+                      std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::parallel::Communicator &communicator)
         : GridGenerator(builder, para, cudaMemoryManager, communicator)
     {
     }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
index f2d4b27a159c3f3687bd58933b55558ca08cd16d..3f3cfe384f513141d4b8bda36337ac0d96dc8aea 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/ActuatorFarm.cu
@@ -36,7 +36,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 #include "VirtualFluids_GPU/GPU/GeometryUtils.h"
 #include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
index b1ebaf28edc7966074f7cc96e31bf8489ca8e4a9..ca1ed594829682ad94b98a59349e082bf3b315a4 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.cu
@@ -36,7 +36,7 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 #include "LBM/GPUHelperFunctions/KernelUtilities.h"
 
 #include "StringUtilities/StringUtil.h"
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
index 705b7173606d1956c50c59d5cc1f7635a4b7883b..43be448998b5fe73ccc1444450f55acdceab9232 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
@@ -1,7 +1,7 @@
 #include "Probe.h"
 #include "PlanarAverageProbe.h"
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index 19f7f6c62ae7ac83c90fc2a7aff0e286a70063d1..a0f15cc340b99bcb5a837b3afbc4840511644a3f 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -1,7 +1,7 @@
 #include "Probe.h"
 #include "PlaneProbe.h"
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index 19c170608a606227d21c25791776bd3195b16e04..bedcc513a1e441b8d5927f5afafa79f13af2cdbc 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -5,7 +5,7 @@
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 
 #include "Parameter/Parameter.h"
 #include "DataStructureInitializer/GridProvider.h"
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index f52c666c920a049012888e8e1b71578e68d3da31..6b263342e899c53917d71aa190eef08f1451fa49 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -1,7 +1,7 @@
 #include "Probe.h"
 #include "WallModelProbe.h"
 
-#include <cuda/CudaGrid.h>
+#include <cuda_helper/CudaGrid.h>
 
 #include <cuda.h>
 #include <cuda_runtime.h>
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
index 1e70fc642a3fd7f6fca4ed90b9ff4ebc1bb437db..32c02ce8f2ab9a67ba3cbc09ae909d27cf4c9223 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27.cu
@@ -2,7 +2,7 @@
 
 #include "InitCompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitCompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
index f8fc6af00d93cc5a51da4a69d67b69b616f97140..3b70cc80827b2e5f5081a7bcfca6b83789284a05 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7.cu
@@ -2,7 +2,7 @@
 
 #include "InitCompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<InitCompAD7> InitCompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
index c4676f28f969e2db8ff7f1910ac784a1c0dab351..3f2176eace98b825a2dcb8557506787563286434 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27.cu
@@ -2,7 +2,7 @@
 
 #include "InitCompSP27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitCompSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
index 14d6b725337aa8b9af279bf794ff1c0912516b64..cac748bcea4cf3a9067552f87142e6a019c84d90 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3.cu
@@ -2,7 +2,7 @@
 
 #include "InitF3_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitF3::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
index 6a9b4cb31b1032f6921bddbe60d3cd570ef46b6d..246a5884f51af97b4e32977d30f114d6e90a6959 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27.cu
@@ -2,7 +2,7 @@
 
 #include "InitIncompAD27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD27::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
index b7c7d46a9ea8e3133b8240e27959b6b4d2ed0cf5..d246d58a279e6edd79de98a911c293d627bba4c3 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7.cu
@@ -2,7 +2,7 @@
 
 #include "InitIncompAD7_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitIncompAD7::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
index 078ad24f24659bf10a3dc9ed90bfd62b5e021187..8e0702bc9bd6486edff43212098e5df223842a2a 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27.cu
@@ -2,7 +2,7 @@
 
 #include "InitSP27_Device.cuh"
 #include "Parameter/Parameter.h"
-#include "cuda/CudaGrid.h"
+#include <cuda_helper/CudaGrid.h>
 
 std::shared_ptr<PreProcessorStrategy> InitSP27::getNewInstance(std::shared_ptr<Parameter> para)
 {
diff --git a/src/gpu/cuda_helper/CMakeLists.txt b/src/gpu/cuda_helper/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..314ba43431a31643ff9df63018b9194d92a99d6d
--- /dev/null
+++ b/src/gpu/cuda_helper/CMakeLists.txt
@@ -0,0 +1,3 @@
+project(cuda LANGUAGES CUDA CXX)
+
+vf_add_library(PUBLIC_LINK logger)
diff --git a/src/cuda/CudaGrid.cpp b/src/gpu/cuda_helper/CudaGrid.cpp
similarity index 100%
rename from src/cuda/CudaGrid.cpp
rename to src/gpu/cuda_helper/CudaGrid.cpp
diff --git a/src/cuda/CudaGrid.h b/src/gpu/cuda_helper/CudaGrid.h
similarity index 100%
rename from src/cuda/CudaGrid.h
rename to src/gpu/cuda_helper/CudaGrid.h
diff --git a/src/cuda/CudaTimer.cpp b/src/gpu/cuda_helper/CudaTimer.cpp
similarity index 100%
rename from src/cuda/CudaTimer.cpp
rename to src/gpu/cuda_helper/CudaTimer.cpp
diff --git a/src/cuda/CudaTimer.h b/src/gpu/cuda_helper/CudaTimer.h
similarity index 100%
rename from src/cuda/CudaTimer.h
rename to src/gpu/cuda_helper/CudaTimer.h
diff --git a/src/cuda/DeviceInfo.cpp b/src/gpu/cuda_helper/DeviceInfo.cpp
similarity index 100%
rename from src/cuda/DeviceInfo.cpp
rename to src/gpu/cuda_helper/DeviceInfo.cpp
diff --git a/src/cuda/DeviceInfo.h b/src/gpu/cuda_helper/DeviceInfo.h
similarity index 100%
rename from src/cuda/DeviceInfo.h
rename to src/gpu/cuda_helper/DeviceInfo.h
diff --git a/src/mpi/CMakeLists.txt b/src/mpi/CMakeLists.txt
deleted file mode 100644
index de1d58f5bd39e14742180b9cc6a77fb640d117f6..0000000000000000000000000000000000000000
--- a/src/mpi/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-
-vf_add_library(NAME mpi PUBLIC_LINK logger PRIVATE_LINK MPI::MPI_CXX basics)
diff --git a/src/mpi/NullCommunicator.cpp b/src/mpi/NullCommunicator.cpp
deleted file mode 100644
index 267942895df0afbfae7e2d528feb67ff7a2c6519..0000000000000000000000000000000000000000
--- a/src/mpi/NullCommunicator.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
-//      \    \  |    |   ________________________________________________________________
-//       \    \ |    |  |  ______________________________________________________________|
-//        \    \|    |  |  |         __          __     __     __     ______      _______
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
-//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
-//
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file NullCommunicator.cpp
-//! \ingroup Parallel
-//! \author Konstantin Kutscher
-//=======================================================================================
-
-#include "NullCommunicator.h"
-
-namespace vf::mpi
-{
-
-    std::shared_ptr<Communicator> NullCommunicator::getInstance()
-    {
-        std::lock_guard<std::mutex> myLock(instantiation_mutex);
-        if (!instance){
-            instance = std::shared_ptr<NullCommunicator>(new NullCommunicator);
-        }
-        return instance;
-    }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getBundleID() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getNumberOfBundles() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getProcessID() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getNumberOfProcesses() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    void *NullCommunicator::getNativeCommunicator() { return NULL; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getRoot() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getBundleRoot() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    int NullCommunicator::getProcessRoot() { return 0; }
-    //////////////////////////////////////////////////////////////////////////
-    std::vector<std::string> NullCommunicator::gather(const std::string & /*str*/) { return std::vector<std::string>(); }
-    //////////////////////////////////////////////////////////////////////////
-
-    void NullCommunicator::sendSerializedObject(std::stringstream &ss, int target) {}
-    //////////////////////////////////////////////////////////////////////////
-    void NullCommunicator::receiveSerializedObject(std::stringstream &ss, int source) {}
-
-    int NullCommunicator::getProcessID(int bundle, int rank) { return 0; }
-    bool NullCommunicator::isRoot() {return true; }
-
-    int NullCommunicator::getNumberOfProcessesInBundle(int bundle) {return 0;}
-    void NullCommunicator::barrier() {}
-    void NullCommunicator::abort(int errorcode) {}
-
-
-    std::vector<int> NullCommunicator::gather(std::vector<int> &values){ return std::vector<int>(); }
-    std::vector<float> NullCommunicator::gather(std::vector<float> &values){ return std::vector<float>(); }
-    std::vector<double> NullCommunicator::gather(std::vector<double> &values){ return std::vector<double>(); }
-    std::vector<unsigned long long> NullCommunicator::gather(std::vector<unsigned long long> &values){ return std::vector<unsigned long long>(); }
-
-    void NullCommunicator::allGather(std::vector<int> &svalues, std::vector<int> &rvalues){ }
-    void NullCommunicator::allGather(std::vector<float> &svalues, std::vector<float> &rvalues){ }
-    void NullCommunicator::allGather(std::vector<double> &svalues, std::vector<double> &rvalues){ }
-    void NullCommunicator::allGather(std::vector<unsigned long long> &svalues, std::vector<unsigned long long> &rvalues){ }
-
-    void NullCommunicator::broadcast(int &value){ }
-    void NullCommunicator::broadcast(float &value){ }
-    void NullCommunicator::broadcast(double &value){ }
-    void NullCommunicator::broadcast(long int &value){ }
-    void NullCommunicator::broadcast(std::vector<int> &values){ }
-    void NullCommunicator::broadcast(std::vector<float> &values){ }
-    void NullCommunicator::broadcast(std::vector<double> &values){ }
-    void NullCommunicator::broadcast(std::vector<long int> &values){ }
-}
diff --git a/src/parallel/CMakeLists.txt b/src/parallel/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..742ebc3631ed3419bae3ebf1a67b40baf6a5be28
--- /dev/null
+++ b/src/parallel/CMakeLists.txt
@@ -0,0 +1,6 @@
+
+vf_add_library(PUBLIC_LINK logger MPI::MPI_CXX basics)
+
+if(MSVC)
+    target_link_libraries(parallel PRIVATE ws2_32)
+endif()
diff --git a/src/mpi/Communicator.cpp b/src/parallel/Communicator.cpp
similarity index 99%
rename from src/mpi/Communicator.cpp
rename to src/parallel/Communicator.cpp
index 937f4d819f67804380d807bfe5ef6048ff507058..b7708cb5ab79ce4acee9c76b6ba26e001b655c17 100644
--- a/src/mpi/Communicator.cpp
+++ b/src/parallel/Communicator.cpp
@@ -34,7 +34,7 @@
 #include "Communicator.h"
 #include <basics/utilities/UbException.h>
 
-namespace vf::mpi 
+namespace vf::parallel
 {
 std::mutex Communicator::instantiation_mutex = std::mutex();
 std::shared_ptr<Communicator> Communicator::instance = std::shared_ptr<Communicator>();
diff --git a/src/mpi/Communicator.h b/src/parallel/Communicator.h
similarity index 68%
rename from src/mpi/Communicator.h
rename to src/parallel/Communicator.h
index bcec064a23801c7b597d91deb601b44a1d9c7a71..23a760981944cfa143d7521275255f39a92bb7fe 100644
--- a/src/mpi/Communicator.h
+++ b/src/parallel/Communicator.h
@@ -34,43 +34,43 @@
 #ifndef MPI_COMMUNICATOR_H
 #define MPI_COMMUNICATOR_H
 
-#include <string>
-#include <vector>
 #include <memory>
-#include <sstream>
 #include <mutex>
+#include <sstream>
+#include <string>
+#include <vector>
 
+#include <basics/DataTypes.h>
 
-namespace vf::mpi 
+namespace vf::parallel
 {
 
 //! \brief An abstract class for communication between processes in parallel computation
 class Communicator
 {
 public:
-    Communicator(const Communicator&) = delete;
-    Communicator & operator=(const Communicator& rhs) = delete;
+    Communicator(const Communicator &) = delete;
+    Communicator &operator=(const Communicator &rhs) = delete;
     static std::shared_ptr<Communicator> getInstance();
 
     virtual ~Communicator() = default;
 
-    virtual int getBundleID()                      = 0;
-    virtual int getNumberOfBundles()               = 0;
-    virtual int getProcessID()                     = 0;
-    virtual int getProcessID(int bundle, int rank) = 0;
-    virtual int getNumberOfProcesses()             = 0;
-    virtual bool isRoot()                          = 0;
-    virtual void *getNativeCommunicator()          = 0;
+    virtual int getBundleID() const                      = 0;
+    virtual int getNumberOfBundles() const               = 0;
+    virtual int getProcessID() const                     = 0;
+    virtual int getProcessID(int bundle, int rank) const = 0;
+    virtual bool isRoot() const                          = 0;
+    virtual void *getNativeCommunicator()                = 0;
 
     virtual void sendSerializedObject(std::stringstream &ss, int target)    = 0;
     virtual void receiveSerializedObject(std::stringstream &ss, int source) = 0;
 
-    virtual int getRoot()                                = 0;
-    virtual int getBundleRoot()                          = 0;
-    virtual int getProcessRoot()                         = 0;
-    virtual int getNumberOfProcessesInBundle(int bundle) = 0;
-    virtual void barrier()                               = 0;
-    virtual void abort(int errorcode)                    = 0;
+    virtual int getRoot() const                                = 0;
+    virtual int getBundleRoot() const                          = 0;
+    virtual int getProcessRoot() const                         = 0;
+    virtual int getNumberOfProcessesInBundle(int bundle) const = 0;
+    virtual void barrier()                                     = 0;
+    virtual void abort(int errorcode)                          = 0;
 
     virtual std::vector<std::string> gather(const std::string &str)                         = 0;
     virtual std::vector<int> gather(std::vector<int> &values)                               = 0;
@@ -92,6 +92,20 @@ public:
     virtual void broadcast(std::vector<double> &values)   = 0;
     virtual void broadcast(std::vector<long int> &values) = 0;
 
+    virtual void receiveSend(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
+                             int size_buffer_send, int neighbor_rank_send) const = 0;
+    virtual int getNumberOfProcesses() const = 0;
+    virtual void send(real *sbuf, int count_s, int nb_rank) const = 0;
+    virtual double reduceSum(double quantityPerProcess) const = 0;
+    virtual int mapCudaDevicesOnHosts(const std::vector<unsigned int> &devices, int numberOfDevices) const = 0;
+    virtual void receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                             int neighbor_rank) const = 0;
+    virtual void receiveNonBlocking(real *rbuf, int count_r, int sourceRank) = 0;
+    virtual void sendNonBlocking(real *sbuf, int count_s, int destinationRank) = 0;
+    virtual void send(real *sbuf, int count_s, int destinationRank) = 0;
+    virtual void waitAll() = 0;
+    virtual void resetRequests() = 0;
+
 protected:
     Communicator() = default;
 
diff --git a/src/mpi/MPICommunicator.cpp b/src/parallel/MPICommunicator.cpp
similarity index 58%
rename from src/mpi/MPICommunicator.cpp
rename to src/parallel/MPICommunicator.cpp
index 4e7a155ef3b78f7daa0a582f84f2ef8af83886c8..8ae7ea66387f5e02b2e90dcacefbc324e81ca564 100644
--- a/src/mpi/MPICommunicator.cpp
+++ b/src/parallel/MPICommunicator.cpp
@@ -1,17 +1,26 @@
 #if defined VF_MPI
+#if defined (_WIN32) || defined (_WIN64)
+   #include <Winsock2.h>
+#elif defined (__unix__)
+   #include <unistd.h>
+#endif
 
 #include "MPICommunicator.h"
+
 #include <mpi.h>
 
 #include <sstream>
+
+#include <logger/Logger.h>
+
 using namespace std;
 
-namespace vf::mpi 
+namespace vf::parallel
 {
 std::shared_ptr<Communicator> MPICommunicator::getInstance()
 {
     std::lock_guard<std::mutex> myLock(instantiation_mutex);
-    if (!instance){
+    if (!instance) {
         instance = std::shared_ptr<MPICommunicator>(new MPICommunicator);
     }
     return instance;
@@ -22,13 +31,11 @@ MPICommunicator::MPICommunicator()
     // proof if MPI is initialized
     int mpiInitialized = 0; // false
     MPI_Initialized(&mpiInitialized);
-    if (!mpiInitialized) {
+    if (mpiInitialized == 0) {
         MPI_Init(NULL, NULL);
-        // MPI_Init_thread(NULL, NULL, MPI_THREAD_FUNNELED, NULL);
     }
     MPI_Comm_rank(MPI_COMM_WORLD, &PID);
     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
-    // numprocs = 1000;
     comm = MPI_COMM_WORLD;
     root = 0;
 }
@@ -38,9 +45,8 @@ MPICommunicator::~MPICommunicator()
     // proof if MPI is finalized
     int _mpiFinalized = 0; // false
     MPI_Finalized(&_mpiFinalized);
-    if (!_mpiFinalized) {
+    if (_mpiFinalized == 0) {
         MPI_Finalize();
-        // UBLOG(logINFO, "MPI_Finalize()");
     }
 }
 //////////////////////////////////////////////////////////////////////////
@@ -88,27 +94,27 @@ std::vector<unsigned long long> MPICommunicator::gather(std::vector<unsigned lon
     return gather<unsigned long long>(values);
 }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getProcessID() { return PID; }
+int MPICommunicator::getProcessID() const { return PID; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getProcessID(int /*bundle*/, int /*rank*/) { return PID; }
+int MPICommunicator::getProcessID(int /*bundle*/, int /*rank*/) const { return PID; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getNumberOfProcesses() { return numprocs; }
+int MPICommunicator::getNumberOfProcesses() const { return numprocs; }
 //////////////////////////////////////////////////////////////////////////
 void *MPICommunicator::getNativeCommunicator() { return &comm; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getBundleID() { return 0; }
+int MPICommunicator::getBundleID() const { return 0; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getNumberOfBundles() { return 1; }
+int MPICommunicator::getNumberOfBundles() const { return 1; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getRoot() { return root; }
+int MPICommunicator::getRoot() const { return root; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getBundleRoot() { return 0; }
+int MPICommunicator::getBundleRoot() const { return 0; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getProcessRoot() { return 0; }
+int MPICommunicator::getProcessRoot() const { return 0; }
 //////////////////////////////////////////////////////////////////////////
-int MPICommunicator::getNumberOfProcessesInBundle(int /*bundle*/) { return numprocs; }
+int MPICommunicator::getNumberOfProcessesInBundle(int /*bundle*/) const { return numprocs; }
 //////////////////////////////////////////////////////////////////////////
-bool MPICommunicator::isRoot() { return PID == root; }
+bool MPICommunicator::isRoot() const { return PID == root; }
 //////////////////////////////////////////////////////////////////////////
 void MPICommunicator::sendSerializedObject(std::stringstream &ss, int target)
 {
@@ -169,6 +175,120 @@ void MPICommunicator::broadcast(double &value) { broadcast<double>(value); }
 //////////////////////////////////////////////////////////////////////////
 void MPICommunicator::broadcast(long int &value) { broadcast<long int>(value); }
 
+void MPICommunicator::receiveSend(uint *buffer_receive, int size_buffer_recv,
+                                  int neighbor_rank_recv, uint *buffer_send, int size_buffer_send,
+                                  int neighbor_rank_send) const
+{
+    MPI_Request recv_request;
+    MPI_Irecv(buffer_receive, size_buffer_recv, MPI_UNSIGNED, neighbor_rank_recv, 0, comm,
+              &recv_request);
+    // printf("receive_send PID: %i,   nbRev: nb_rank_recv: %i", this->getPID(), nb_rank_r);
+    // fflush(stdout);
+    MPI_Send(buffer_send, size_buffer_send, MPI_UNSIGNED, neighbor_rank_send, 0, comm);
+    // printf("receive_send PID: %i,   sendUintGPU: nb_rank_send: %i", this->getPID(), nb_rank_s);
+    // fflush(stdout);
+    MPI_Wait(&recv_request, MPI_STATUSES_IGNORE); // TODO: Do we have a benefit here or could we simply do a blocking receiv.
+}
+
+void MPICommunicator::receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                     int neighbor_rank) const
+{
+    MPI_Send(buffer_send, size_buffer_send, VF_MPI_REAL, neighbor_rank, 0, comm);
+    MPI_Recv(buffer_receive, size_buffer_recv, VF_MPI_REAL, neighbor_rank, 0, comm, MPI_STATUS_IGNORE);
+}
+
+void MPICommunicator::send(real *sbuf, int count_s, int nb_rank) const
+{
+    MPI_Send(sbuf, count_s, VF_MPI_REAL, nb_rank, 0, comm);
+}
+
+double MPICommunicator::reduceSum(double quantityPerProcess) const
+{
+    double *buffer_send = &quantityPerProcess;
+    double *buffer_recv = (double *)malloc(sizeof(double));
+
+    MPI_Reduce(buffer_send, buffer_recv, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
+
+    return *buffer_recv;
+}
+
+int MPICommunicator::mapCudaDevicesOnHosts(const std::vector<unsigned int> &devices, int numberOfDevices) const
+{
+    int device        = -1;
+    char *host        = (char *)malloc(sizeof(char) * getNumberOfProcesses() * 255);
+    unsigned int *map = (unsigned int *)malloc(sizeof(unsigned int) * getNumberOfProcesses());
+
+    char hostname[255];
+    gethostname(hostname, 254);
+    hostname[254] = 0;
+
+    MPI_Gather(hostname, 255, MPI_BYTE, host, 255, MPI_BYTE, 0, MPI_COMM_WORLD);
+
+    int i, j;
+    if (isRoot()) {
+        for (i = 0; i < getNumberOfProcesses(); i++) {
+            int counter = 0;
+            for (j = 0; j < i; j++) {
+                if (strcmp(&host[i * 255], &host[j * 255]) == 0)
+                    counter++;
+            }
+            if (counter >= numberOfDevices) {
+                VF_LOG_CRITICAL("More processes than GPUs!");
+                exit(1);
+            }
+            map[i] = devices[counter];
+        }
+    }
+
+    MPI_Scatter(map, 1, MPI_UNSIGNED, &device, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
+
+    VF_LOG_INFO("Rank: {} runs on host: {} with GPU: {}", getProcessID(), hostname, device);
+
+    free(map);
+    free(host);
+    return device;
+}
+
+void MPICommunicator::receiveNonBlocking(real *rbuf, int count_r, int sourceRank)
+{
+    // printf("\n Start Recv Rank: %d, neighbor Rank: %d, request = %d \n", PID, nb_rank, (int)requestGPU.size());
+    // fflush(stdout);
+
+    MPI_Request request;
+    MPI_Irecv(rbuf, count_r, VF_MPI_REAL, sourceRank, 0, comm, &request);
+    requests.push_back(request);
+
+    // printf("\n End Recv - Rank: %d , neighbor Rank: %d \n", PID, nb_rank);
+    // fflush(stdout);
+}
+
+void MPICommunicator::sendNonBlocking(real *sbuf, int count_s, int destinationRank)
+{
+    // printf("\n Start Send Rank: %d, neighbor Rank: %d, request = %d \n", PID, nb_rank, (int)requestGPU.size());
+    // fflush(stdout);
+
+    MPI_Request request;
+    MPI_Isend(sbuf, count_s, VF_MPI_REAL, destinationRank, 0, comm, &request);
+    requests.push_back(request);
+    // printf("\n End Send - Rank: %d , neighbor Rank: %d \n", PID, nb_rank);
+    // fflush(stdout);
+}
+
+void MPICommunicator::send(real *sbuf, int count_s, int destinationRank)
+{
+    MPI_Send(sbuf, count_s, VF_MPI_REAL, destinationRank, 0, comm);
+}
+
+void MPICommunicator::waitAll()
+{
+    MPI_Waitall((int)requests.size(), requests.data(), MPI_STATUSES_IGNORE);
+}
+
+void MPICommunicator::resetRequests()
+{
+    requests.clear();
+}
+
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/mpi/MPICommunicator.h b/src/parallel/MPICommunicator.h
similarity index 80%
rename from src/mpi/MPICommunicator.h
rename to src/parallel/MPICommunicator.h
index 941bdac8f03d64184c17966ada27111193bbfa2d..466a3c3f8d311c743b546116a0a4ca9a23735488 100644
--- a/src/mpi/MPICommunicator.h
+++ b/src/parallel/MPICommunicator.h
@@ -4,14 +4,22 @@
 #define MPI_MPICOMMUNICATOR_H
 
 #include "Communicator.h"
-#include <PointerDefinitions.h>
+#include <basics/PointerDefinitions.h>
 #include <basics/utilities/UbException.h>
 #include <basics/utilities/UbLogger.h>
 #include <mpi.h>
 #include <string>
 #include <vector>
 
-namespace vf::mpi 
+//////////////////////////////////
+#ifdef VF_DOUBLE_ACCURACY
+#define VF_MPI_REAL MPI_DOUBLE
+#else
+#define VF_MPI_REAL MPI_FLOAT
+#endif
+//////////////////////////////////
+
+namespace vf::parallel
 {
 
 //! \brief A class uses MPI library to communication.
@@ -25,17 +33,17 @@ public:
 
     ~MPICommunicator() override;
     static std::shared_ptr<Communicator> getInstance();
-    int getBundleID() override;
-    int getNumberOfBundles() override;
-    int getProcessID() override;
-    int getProcessID(int bundle, int rank) override;
-    int getNumberOfProcesses() override;
+    int getBundleID() const override;
+    int getNumberOfBundles() const override;
+    int getProcessID() const override;
+    int getProcessID(int bundle, int rank) const override;
+    int getNumberOfProcesses() const override;
     void *getNativeCommunicator() override;
-    int getRoot() override;
-    int getBundleRoot() override;
-    int getProcessRoot() override;
-    int getNumberOfProcessesInBundle(int bundle) override;
-    bool isRoot() override;
+    int getRoot() const override;
+    int getBundleRoot() const override;
+    int getProcessRoot() const override;
+    int getNumberOfProcessesInBundle(int bundle) const override;
+    bool isRoot() const override;
     void abort(int errorcode) override;
 
     void sendSerializedObject(std::stringstream &ss, int target) override;
@@ -75,12 +83,30 @@ public:
     template <class T>
     void broadcast(T &value);
 
+    void receiveSend(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
+                     int size_buffer_send, int neighbor_rank_send) const override;
+
+    void send(real *sbuf, int count_s, int nb_rank) const override;
+    double reduceSum(double quantityPerProcess) const override;
+
+    int mapCudaDevicesOnHosts(const std::vector<unsigned int> &devices, int numberOfDevices) const override;
+    void receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                     int neighbor_rank) const override;
+
+    void receiveNonBlocking(real *rbuf, int count_r, int sourceRank) override;
+    void sendNonBlocking(real *sbuf, int count_s, int destinationRank) override;
+    void send(real *sbuf, int count_s, int destinationRank) override;
+    void waitAll() override;
+    void resetRequests() override;
+
 private:
     MPICommunicator();
 
     int numprocs, PID;
     MPI_Comm comm;
     int root;
+
+    std::vector<MPI_Request> requests;
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -96,6 +122,8 @@ std::vector<T> MPICommunicator::gather(std::vector<T> &values)
         mpiDataType = MPI_INT;
     else if ((std::string) typeid(T).name() == (std::string) typeid(unsigned long long).name())
         mpiDataType = MPI_UNSIGNED_LONG_LONG;
+    else if ((std::string) typeid(T).name() == (std::string) typeid(char).name())
+        mpiDataType = MPI_CHAR;
     else
         throw UbException(UB_EXARGS, "no MpiDataType for T" + (std::string) typeid(T).name());
 
@@ -209,6 +237,7 @@ void MPICommunicator::broadcast(T &value)
 }
 //////////////////////////////////////////////////////////////////////////
 
+
 #endif
 
 }
diff --git a/src/parallel/NullCommunicator.cpp b/src/parallel/NullCommunicator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..56c17a8c3c466dae3299d0b32f9e918c68cc1610
--- /dev/null
+++ b/src/parallel/NullCommunicator.cpp
@@ -0,0 +1,217 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NullCommunicator.cpp
+//! \ingroup Parallel
+//! \author Konstantin Kutscher
+//=======================================================================================
+
+#include "NullCommunicator.h"
+
+#include <memory>
+
+namespace vf::parallel
+{
+
+std::shared_ptr<Communicator> NullCommunicator::getInstance()
+{
+    std::lock_guard<std::mutex> myLock(instantiation_mutex);
+    if (!instance) {
+        instance = std::make_shared<NullCommunicator>();
+    }
+    return instance;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getBundleID() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getNumberOfBundles() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getProcessID() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getNumberOfProcesses() const
+{
+    return 1;
+}
+//////////////////////////////////////////////////////////////////////////
+void *NullCommunicator::getNativeCommunicator()
+{
+    return NULL;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getRoot() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getBundleRoot() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+int NullCommunicator::getProcessRoot() const
+{
+    return 0;
+}
+//////////////////////////////////////////////////////////////////////////
+std::vector<std::string> NullCommunicator::gather(const std::string & /*str*/)
+{
+    return {};
+}
+//////////////////////////////////////////////////////////////////////////
+
+void NullCommunicator::sendSerializedObject(std::stringstream &stream, int target)
+{
+}
+//////////////////////////////////////////////////////////////////////////
+void NullCommunicator::receiveSerializedObject(std::stringstream &stream, int source)
+{
+}
+
+int NullCommunicator::getProcessID(int bundle, int rank) const
+{
+    return 0;
+}
+bool NullCommunicator::isRoot() const
+{
+    return true;
+}
+
+int NullCommunicator::getNumberOfProcessesInBundle(int bundle) const
+{
+    return 0;
+}
+void NullCommunicator::barrier()
+{
+}
+void NullCommunicator::abort(int errorcode)
+{
+}
+
+std::vector<int> NullCommunicator::gather(std::vector<int> &values)
+{
+    return {};
+}
+std::vector<float> NullCommunicator::gather(std::vector<float> &values)
+{
+    return {};
+}
+std::vector<double> NullCommunicator::gather(std::vector<double> &values)
+{
+    return {};
+}
+std::vector<unsigned long long> NullCommunicator::gather(std::vector<unsigned long long> &values)
+{
+    return {};
+}
+
+void NullCommunicator::allGather(std::vector<int> &svalues, std::vector<int> &rvalues)
+{
+}
+void NullCommunicator::allGather(std::vector<float> &svalues, std::vector<float> &rvalues)
+{
+}
+void NullCommunicator::allGather(std::vector<double> &svalues, std::vector<double> &rvalues)
+{
+}
+void NullCommunicator::allGather(std::vector<unsigned long long> &svalues, std::vector<unsigned long long> &rvalues)
+{
+}
+
+void NullCommunicator::broadcast(int &value)
+{
+}
+void NullCommunicator::broadcast(float &value)
+{
+}
+void NullCommunicator::broadcast(double &value)
+{
+}
+void NullCommunicator::broadcast(long int &value)
+{
+}
+void NullCommunicator::broadcast(std::vector<int> &values)
+{
+}
+void NullCommunicator::broadcast(std::vector<float> &values)
+{
+}
+void NullCommunicator::broadcast(std::vector<double> &values)
+{
+}
+void NullCommunicator::broadcast(std::vector<long int> &values)
+{
+}
+
+void NullCommunicator::receiveSend(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
+                                   int size_buffer_send, int neighbor_rank_send) const
+{
+}
+
+void NullCommunicator::send(real *sbuf, int count_s, int nb_rank) const {};
+double NullCommunicator::reduceSum(double /*quantityPerProcess*/) const
+{
+    return 0.0;
+};
+int NullCommunicator::mapCudaDevicesOnHosts(const std::vector<unsigned int> &devices, int numberOfDevices) const
+{
+    return 0;
+}
+
+void NullCommunicator::receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                                   int neighbor_rank) const
+{
+}
+
+void NullCommunicator::receiveNonBlocking(real *rbuf, int count_r, int sourceRank)
+{
+}
+void NullCommunicator::sendNonBlocking(real *sbuf, int count_s, int destinationRank)
+{
+}
+
+void NullCommunicator::send(real *sbuf, int count_s, int destinationRank)
+{
+}
+
+void NullCommunicator::waitAll()
+{
+}
+
+void NullCommunicator::resetRequests()
+{
+}
+} // namespace vf::parallel
diff --git a/src/mpi/NullCommunicator.h b/src/parallel/NullCommunicator.h
similarity index 53%
rename from src/mpi/NullCommunicator.h
rename to src/parallel/NullCommunicator.h
index 836f801ab6d15e377da8a34dfed8016b05f86e3a..295f946dbe8dd05c89ec57b733aa84867b5bea70 100644
--- a/src/mpi/NullCommunicator.h
+++ b/src/parallel/NullCommunicator.h
@@ -36,7 +36,7 @@
 
 #include "Communicator.h"
 
-namespace vf::mpi
+namespace vf::parallel
 {
 
 //! \brief A class implements Communicator for shared memory.
@@ -46,45 +46,62 @@ class NullCommunicator : public Communicator
 public:
     static std::shared_ptr<Communicator> getInstance();
 
-    int getBundleID();
-    int getNumberOfBundles();
-    int getProcessID();
-    int getProcessID(int bundle, int rank);
-    int getNumberOfProcesses();
-    bool isRoot();
-    void *getNativeCommunicator();
-
-    void sendSerializedObject(std::stringstream &ss, int target);
-    void receiveSerializedObject(std::stringstream &ss, int source);
-
-    int getRoot();
-    int getBundleRoot();
-    int getProcessRoot();
-    int getNumberOfProcessesInBundle(int bundle);
-    void barrier();
-    void abort(int errorcode);
-
-    std::vector<std::string> gather(const std::string &str);
-    std::vector<int> gather(std::vector<int> &values);
-    std::vector<float> gather(std::vector<float> &values);
-    std::vector<double> gather(std::vector<double> &values);
-    std::vector<unsigned long long> gather(std::vector<unsigned long long> &values);
-
-    void allGather(std::vector<int> &svalues, std::vector<int> &rvalues);
-    void allGather(std::vector<float> &svalues, std::vector<float> &rvalues);
-    void allGather(std::vector<double> &svalues, std::vector<double> &rvalues);
-    void allGather(std::vector<unsigned long long> &svalues, std::vector<unsigned long long> &rvalues);
-
-    void broadcast(int &value);
-    void broadcast(float &value);
-    void broadcast(double &value);
-    void broadcast(long int &value);
-    void broadcast(std::vector<int> &values);
-    void broadcast(std::vector<float> &values);
-    void broadcast(std::vector<double> &values);
-    void broadcast(std::vector<long int> &values);
+    int getBundleID() const override;
+    int getNumberOfBundles() const override;
+    int getProcessID() const override;
+    int getProcessID(int bundle, int rank) const override;
+    int getNumberOfProcesses() const override;
+    bool isRoot() const override;
+    void *getNativeCommunicator() override;
+
+    void sendSerializedObject(std::stringstream &stream, int target) override;
+    void receiveSerializedObject(std::stringstream &stream, int source) override;
+
+    int getRoot() const override;
+    int getBundleRoot() const override;
+    int getProcessRoot() const override;
+    int getNumberOfProcessesInBundle(int bundle) const override;
+    void barrier() override;
+    void abort(int errorcode) override;
+
+    std::vector<std::string> gather(const std::string &str) override;
+    std::vector<int> gather(std::vector<int> &values) override;
+    std::vector<float> gather(std::vector<float> &values) override;
+    std::vector<double> gather(std::vector<double> &values) override;
+    std::vector<unsigned long long> gather(std::vector<unsigned long long> &values) override;
+
+    void allGather(std::vector<int> &svalues, std::vector<int> &rvalues) override;
+    void allGather(std::vector<float> &svalues, std::vector<float> &rvalues) override;
+    void allGather(std::vector<double> &svalues, std::vector<double> &rvalues) override;
+    void allGather(std::vector<unsigned long long> &svalues, std::vector<unsigned long long> &rvalues) override;
+
+    void broadcast(int &value) override;
+    void broadcast(float &value) override;
+    void broadcast(double &value) override;
+    void broadcast(long int &value) override;
+    void broadcast(std::vector<int> &values) override;
+    void broadcast(std::vector<float> &values) override;
+    void broadcast(std::vector<double> &values) override;
+    void broadcast(std::vector<long int> &values) override;
+
+    void receiveSend(uint *buffer_receive, int size_buffer_recv, int neighbor_rank_recv, uint *buffer_send,
+                     int size_buffer_send, int neighbor_rank_send) const override;
+
+    void send(real *sbuf, int count_s, int nb_rank) const override;
+
+    double reduceSum(double quantityPerProcess) const override;
+    int mapCudaDevicesOnHosts(const std::vector<unsigned int> &devices, int numberOfDevices) const override;
+
+    void receiveSend(real *buffer_send, int size_buffer_send, real *buffer_receive, int size_buffer_recv,
+                     int neighbor_rank) const override;
+
+    void receiveNonBlocking(real *rbuf, int count_r, int sourceRank) override;
+    void sendNonBlocking(real *sbuf, int count_s, int destinationRank) override;
+    void send(real *sbuf, int count_s, int destinationRank) override;
+    void waitAll() override;
+    void resetRequests() override;
 };
 
-}
+} // namespace vf::parallel
 
 #endif