diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 18f6432e2d13fb605c1de4a311b9af09e7822810..ded3d2f3315d651c94add505e142ee585063d13a 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -143,12 +143,12 @@ void multipleLevel(const std::string& configPath)
     {
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+        vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
         vf::basics::ConfigurationFile config;
         config.load(configPath);
 
-        SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+        SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -206,7 +206,7 @@ void multipleLevel(const std::string& configPath)
 
         SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
 
-        Simulation sim;
+        Simulation sim(communicator);
         SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
         SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
         SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
@@ -334,8 +334,6 @@ void multipleLevel(const std::string& configPath)
 
 int main( int argc, char* argv[])
 {
-    MPI_Init(&argc, &argv);
-
     try
     {
         vf::logging::Logger::initalizeLogger();
@@ -362,6 +360,5 @@ int main( int argc, char* argv[])
         VF_LOG_CRITICAL("Unknown exception!");
     }
 
-   MPI_Finalize();
    return 0;
 }
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index bc0fdfa440a1eb1fa466bccf3a68e6216a513fbb..88ec364ea0e7d6a9010d67dac26f4a442db45e8f 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -54,6 +54,7 @@
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
+#include <logger/Logger.h>
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -187,12 +188,12 @@ void multipleLevel(const std::string& configPath)
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-    vf::gpu::Communicator* comm = vf::gpu::Communicator::getInstanz();
+    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
 
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, comm->getNummberOfProcess(), comm->getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     const real velocityLB = (real)0.0844; // LB units
@@ -323,7 +324,7 @@ void multipleLevel(const std::string& configPath)
 
     SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager);
 
-    Simulation sim;
+    Simulation sim (communicator);
     SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
     SPtr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
     SPtr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
@@ -717,31 +718,31 @@ std::string chooseVariation()
 
 int main( int argc, char* argv[])
 {
-    MPI_Init(&argc, &argv);
-    if ( argv != NULL )
+    try
     {
-        try
-        {
-            // assuming that the config files is stored parallel to this file.
-            std::filesystem::path filePath = __FILE__;
-            filePath.replace_filename("configDrivenCavity.txt");
+        vf::logging::Logger::initalizeLogger();
 
-            multipleLevel(filePath.string());
-        }
-        catch (const std::bad_alloc& e)
-        { 
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
-        }
-        catch (const std::exception& e)
-        {   
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
-        }
-        catch (...)
-        {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
-        }
+        // assuming that the config files is stored parallel to this file.
+        std::filesystem::path filePath = __FILE__;
+        filePath.replace_filename("configDrivenCavity.txt");
+
+        multipleLevel(filePath.string());
+    }
+    catch (const spdlog::spdlog_ex &ex) {
+        std::cout << "Log initialization failed: " << ex.what() << std::endl;
+    }
+    catch (const std::bad_alloc& e)
+    { 
+        VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
+    }
+    catch (const std::exception& e)
+    {   
+        VF_LOG_CRITICAL("exception: {}", e.what());
+    }
+    catch (...)
+    {
+        VF_LOG_CRITICAL("Unknown exception!");
     }
 
-   MPI_Finalize();
    return 0;
 }
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index 9b90e03648c3485dc496dac86deadeb7247e6a58..71757d073fb30bc888173fa47adf7236e199f3a5 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -1,4 +1,4 @@
-#include "Calculation/UpdateGrid27.h"
+#include "UpdateGrid27.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "Calculation/DragLift.h"
@@ -9,7 +9,7 @@
 #include "Kernel/Kernel.h"
 
 void updateGrid27(Parameter* para, 
-                  vf::gpu::Communicator* comm, 
+                  vf::gpu::Communicator& comm, 
                   CudaMemoryManager* cudaManager, 
                   std::vector<std::shared_ptr<PorousMedia>>& pm, 
                   int level, 
@@ -149,7 +149,7 @@ void collisionAdvectionDiffusion(Parameter* para, int level)
 	}
 }
 
-void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
     if (para->getNumprocs() > 1)
 	{
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index c66d6afd40e4261ce0a6800c6239071c81c95179..ac0d13d6454e4002117dc2874df98172f8e79a30 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -11,7 +11,7 @@
 class Kernel;
 
 extern "C" void updateGrid27(Parameter* para, 
-                             vf::gpu::Communicator* comm, 
+                             vf::gpu::Communicator& comm, 
                              CudaMemoryManager* cudaManager, 
                              std::vector<std::shared_ptr<PorousMedia>>& pm, 
                              int level,
@@ -24,7 +24,7 @@ extern "C" void collisionPorousMedia(Parameter* para, std::vector<std::shared_pt
 
 extern "C" void collisionAdvectionDiffusion(Parameter* para, int level);
 
-extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeMultiGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 
 extern "C" void postCollisionBC(Parameter* para, int level, unsigned int t);
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
index 32fd45109538101a35e253caff102c4f4df1a4a5..2743f454e321bf21cb4d0b7fd08aab8600a2bee8 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.cpp
@@ -1,9 +1,9 @@
 #include "Communicator.h"
+
 #include <mpi.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <vector>
-#include <string.h>
+
+#include <logger/Logger.h>
 
 #if defined (_WIN32) || defined (_WIN64)
    #include <Winsock2.h>
@@ -12,13 +12,19 @@
 #endif
 //lib for windows Ws2_32.lib
 
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 
+
 Communicator::Communicator()
 {
+    int mpiInitialized = 0; // false
+    MPI_Initialized(&mpiInitialized);
+    if (!mpiInitialized) {
+        MPI_Init(NULL, NULL);
+        VF_LOG_TRACE("vf::gpu::Communicator(): MPI_Init");
+    }
+
     MPI_Comm_rank(MPI_COMM_WORLD, &PID);
     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
 
@@ -29,21 +35,25 @@ Communicator::Communicator()
     // Get my position in this communicator, and my neighbors
     MPI_Cart_shift(comm1d, 0, 1, &nbrbottom, &nbrtop);
 }
-// Crap by Martin Sch.
-Communicator::Communicator(const int numberOfProcs)
+
+Communicator::~Communicator()
 {
-    MPI_Comm_rank(MPI_COMM_WORLD, &PID);
-    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
-    commGPU = MPI_COMM_WORLD;
-    requestGPU.resize(0);
-    rcount = 0;
+    // proof if MPI is finalized
+    int _mpiFinalized = 0; // false
+    MPI_Finalized(&_mpiFinalized);
+    if (!_mpiFinalized) {
+        MPI_Finalize();
+        VF_LOG_TRACE("vf::gpu::~Communicator(): MPI_Finalize");
+    }
 }
-Communicator *Communicator::instanz = 0;
-Communicator *Communicator::getInstanz()
+
+
+// C++11 thread safe singelton implementation:
+// https://stackoverflow.com/questions/1661529/is-meyers-implementation-of-the-singleton-pattern-thread-safe
+Communicator& Communicator::getInstance()
 {
-    if (instanz == 0)
-        instanz = new Communicator(0);
-    return instanz;
+    static Communicator comm;
+    return comm;
 }
 
 void Communicator::exchngBottomToTop(float *sbuf, float *rbuf, int count)
@@ -189,7 +199,7 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec
                     counter++;
             }
             if (counter >= maxdev) {
-                fprintf(stderr, "More processes than GPUs!\n");
+                VF_LOG_CRITICAL("More processes than GPUs!");
                 exit(1);
             }
             map[i] = devices[counter];
@@ -198,12 +208,11 @@ int Communicator::mapCudaDevice(const int &rank, const int &size, const std::vec
 
     MPI_Scatter(map, 1, MPI_UNSIGNED, &device, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
 
-    printf("Rank: %d runs on host: %s with GPU: %d\n", rank, hostname, device);
+    VF_LOG_INFO("Rank: {} runs on host: {} with GPU: {}", rank, hostname, device);
 
     free(map);
     free(host);
     return device;
 }
 
-} // namespace GPU
-} // namespace VF
+}
diff --git a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
index 72c4a136ece03098c10ea65493ba02a0109ed95d..256dde87e8ff6b3a8c7abcae0ac31466cc68ba95 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/Communicator.h
@@ -3,14 +3,11 @@
 
 #include <vector>
 
-
-
-
 #include <mpi.h>
 
 #include "VirtualFluids_GPU_export.h"
 
-#include "LBM/LB.h"
+#include <basics/Core/DataTypes.h>
 
 //////////////////////////////////
 #ifdef VF_DOUBLE_ACCURACY
@@ -21,48 +18,46 @@
 //////////////////////////////////
 
 
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 
 
 class VIRTUALFLUIDS_GPU_EXPORT Communicator
 {
 public:
-	static Communicator* getInstanz();
-	static Communicator* getInstanz(const int numberOfProcs);
-	void exchngBottomToTop(float* sbuf, float* rbuf, int count);
-	void exchngTopToBottom(float* sbuf, float* rbuf, int count);
-   void waitAll();
-   void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode);
-	int getPID() const;
-	int getNummberOfProcess() const;
-	int getNeighbourTop();
-	int getNeighbourBottom();
-   void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count);
-   void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb);
-   //////////////////////////////////////////////////////////////////////////
-   void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
-   void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
-   void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank );
-   void nbSendDataGPU( real* sbuf, int count_s, int nb_rank );
-   void waitallGPU();
-   void sendDataGPU( real* sbuf, int count_s, int nb_rank );
-   void waitGPU(int id);
-   void resetRequest();
-   void barrierGPU();
-   void barrier();
-   //////////////////////////////////////////////////////////////////////////
-   void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count);
-	MPI_Comm getCommunicator();
-	void startTimer();
-	void stopTimer();
-	double getTime();
-	int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev);
-protected:
+    static Communicator& getInstance();
+    Communicator(const Communicator&) = delete;
+    Communicator& operator=(const Communicator&) = delete;
+
+    void exchngBottomToTop(float* sbuf, float* rbuf, int count);
+    void exchngTopToBottom(float* sbuf, float* rbuf, int count);
+    void waitAll();
+    void distributeGeometry(unsigned int* dataRoot, unsigned int* dataNode, int dataSizePerNode);
+    int getPID() const;
+    int getNummberOfProcess() const;
+    int getNeighbourTop();
+    int getNeighbourBottom();
+    void exchngData(float* sbuf_t, float* rbuf_t, float* sbuf_b, float* rbuf_b, int count);
+    void exchngDataNB(float* sbuf_t, int count_st, float* rbuf_t, int count_rt, float* sbuf_b, int count_sb, float* rbuf_b, int count_rb);
+    //////////////////////////////////////////////////////////////////////////
+    void exchngDataGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
+    void sendRecvGPU(real* sbuf, int count_s, real* rbuf, int count_r, int nb_rank);
+    void nbRecvDataGPU( real* rbuf, int count_r, int nb_rank );
+    void nbSendDataGPU( real* sbuf, int count_s, int nb_rank );
+    void waitallGPU();
+    void sendDataGPU( real* sbuf, int count_s, int nb_rank );
+    void waitGPU(int id);
+    void resetRequest();
+    void barrierGPU();
+    void barrier();
+    //////////////////////////////////////////////////////////////////////////
+    void exchngDataGeo(int* sbuf_t, int* rbuf_t, int* sbuf_b, int* rbuf_b, int count);
+    MPI_Comm getCommunicator();
+    void startTimer();
+    void stopTimer();
+    double getTime();
+    int mapCudaDevice(const int &rank, const int &size, const std::vector<unsigned int> &devices, const int &maxdev);
 private:
-   static Communicator* instanz;
    int numprocs, PID;
    int nbrbottom, nbrtop; 
    MPI_Comm comm1d, commGPU;
@@ -75,12 +70,10 @@ private:
    double starttime;
    double endtime;
    Communicator();
-   Communicator(const int numberOfProcs);
-   Communicator(const Communicator&);
+   ~Communicator();
 };
 
-} // namespace GPU
-} // namespace VF
+}
 
 #endif
 
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
index 8f89656ac6feb7dfe2644a2b6d604ccec510c3cb..d91e86c3140bb08aa2d8ef28d7cc147b23a2b804 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.cpp
@@ -7,7 +7,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -30,7 +30,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
 							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborX[i].rankNeighbor);
 	}
@@ -38,7 +38,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	//}
@@ -46,13 +46,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	}
@@ -60,13 +60,13 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -88,7 +88,7 @@ void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -111,7 +111,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborX[i].f[0],
 							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborX[i].rankNeighbor);
 	}
@@ -119,7 +119,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	//}
@@ -127,13 +127,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborX[i].rankNeighbor);
 	}
@@ -141,13 +141,13 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -176,7 +176,7 @@ void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -199,7 +199,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
 							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborY[i].rankNeighbor);
 	}
@@ -207,7 +207,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	//}
@@ -215,13 +215,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	}
@@ -229,13 +229,13 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -257,7 +257,7 @@ void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -280,7 +280,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborY[i].f[0],
 							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborY[i].rankNeighbor);
 	}
@@ -288,7 +288,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	//}
@@ -296,13 +296,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborY[i].rankNeighbor);
 	}
@@ -310,13 +310,13 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -345,7 +345,7 @@ void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -368,7 +368,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor);
 	}
@@ -376,7 +376,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	//}
@@ -384,13 +384,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	}
@@ -398,13 +398,13 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -426,7 +426,7 @@ void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -449,7 +449,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborZ[i].rankNeighbor);
 	}
@@ -457,7 +457,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	//}
@@ -465,13 +465,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborZ[i].rankNeighbor);
 	}
@@ -479,13 +479,13 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -529,7 +529,7 @@ void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, Cu
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //1D domain decomposition
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
 	{
@@ -547,7 +547,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda
 		//////////////////////////////////////////////////////////////////////////
 		cudaManager->cudaCopyProcessNeighborFsDH(level, i);
 		//////////////////////////////////////////////////////////////////////////
-		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
 							para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighbor[i].f[0],
 							para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -574,7 +574,7 @@ void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cuda
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighbors(level, "send")); i++)
 	{
@@ -592,7 +592,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 		//////////////////////////////////////////////////////////////////////////
 		cudaManager->cudaCopyProcessNeighborFsDH(level, i);
 		//////////////////////////////////////////////////////////////////////////
-		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
+		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighbor[i].f[0], 
 							para->getParH(level)->sendProcessNeighbor[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighbor[i].f[0],
 							para->getParH(level)->recvProcessNeighbor[i].numberOfFs,
@@ -623,7 +623,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// X
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 //	{
@@ -641,7 +641,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborXFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborX[i].f[0],
 //							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
@@ -663,7 +663,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 //	{
@@ -681,7 +681,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborXFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborX[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborX[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborX[i].f[0],
 //							para->getParH(level)->recvProcessNeighborX[i].numberOfFs,
@@ -710,7 +710,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Y
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //	{
@@ -728,7 +728,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborYFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborY[i].f[0],
 //							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -750,7 +750,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 //	{
@@ -768,7 +768,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborYFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborY[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborY[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborY[i].f[0],
 //							para->getParH(level)->recvProcessNeighborY[i].numberOfFs,
@@ -797,7 +797,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //// Z
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //	{
@@ -815,7 +815,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborZFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -837,7 +837,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //	}
 //}
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, int level)
+//void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, int level)
 //{
 //	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 //	{
@@ -855,7 +855,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 //		//////////////////////////////////////////////////////////////////////////
 //		para->cudaCopyProcessNeighborZFsDH(level, i);
 //		//////////////////////////////////////////////////////////////////////////
-//		comm->exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
+//		comm.exchngDataGPU(para->getParH(level)->sendProcessNeighborZ[i].f[0], 
 //							para->getParH(level)->sendProcessNeighborZ[i].numberOfFs,
 //							para->getParH(level)->recvProcessNeighborZ[i].f[0],
 //							para->getParH(level)->recvProcessNeighborZ[i].numberOfFs,
@@ -932,7 +932,7 @@ void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, Cud
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -955,7 +955,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
 							para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
 	}
@@ -963,7 +963,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	//}
@@ -971,13 +971,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	}
@@ -985,13 +985,13 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1013,7 +1013,7 @@ void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1036,7 +1036,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADX[i].f[0],
 							para->getParH(level)->recvProcessNeighborADX[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADX[i].rankNeighbor);
 	}
@@ -1044,7 +1044,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	//}
@@ -1052,13 +1052,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADX[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADX[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADX[i].rankNeighbor);
 	}
@@ -1066,13 +1066,13 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1101,7 +1101,7 @@ void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1124,7 +1124,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
 							para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1132,7 +1132,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	//}
@@ -1140,13 +1140,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1154,13 +1154,13 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1182,7 +1182,7 @@ void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1205,7 +1205,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADY[i].f[0],
 							para->getParH(level)->recvProcessNeighborADY[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1213,7 +1213,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	//}
@@ -1221,13 +1221,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADY[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADY[i].numberOfFs,
 			              para->getParH(level)->sendProcessNeighborADY[i].rankNeighbor);
 	}
@@ -1235,13 +1235,13 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1270,7 +1270,7 @@ void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1293,7 +1293,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1301,7 +1301,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	//}
@@ -1309,13 +1309,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1323,13 +1323,13 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1351,7 +1351,7 @@ void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, C
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1374,7 +1374,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
+		comm.nbRecvDataGPU(para->getParH(level)->recvProcessNeighborADZ[i].f[0],
 							para->getParH(level)->recvProcessNeighborADZ[i].numberOfFs,
 							para->getParH(level)->recvProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1382,7 +1382,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////start non blocking MPI send
 	//for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	//{
-	//	comm->nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+	//	comm.nbSendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 	//						para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 	//						para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	//}
@@ -1390,13 +1390,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	////Waitall
 	//if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	//{
-	//	comm->waitallGPU();
+	//	comm.waitallGPU();
 	//}
 	/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
+		comm.sendDataGPU(para->getParH(level)->sendProcessNeighborADZ[i].f[0],
 						  para->getParH(level)->sendProcessNeighborADZ[i].numberOfFs,
 						  para->getParH(level)->sendProcessNeighborADZ[i].rankNeighbor);
 	}
@@ -1404,13 +1404,13 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1486,7 +1486,7 @@ void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // X
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1510,7 +1510,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3X[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3X[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3X[i].rankNeighbor);
@@ -1519,7 +1519,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3X[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3X[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3X[i].rankNeighbor);
@@ -1528,13 +1528,13 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsX(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1564,7 +1564,7 @@ void exchangeCollDataF3XGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Y
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1588,7 +1588,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3Y[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3Y[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3Y[i].rankNeighbor);
@@ -1597,7 +1597,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3Y[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3Y[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3Y[i].rankNeighbor);
@@ -1606,13 +1606,13 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsY(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
@@ -1642,7 +1642,7 @@ void exchangeCollDataF3YGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Z
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level)
+void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level)
 {
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Device to Host
@@ -1666,7 +1666,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start non blocking MPI receive
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->nbRecvDataGPU(
+		comm.nbRecvDataGPU(
 			para->getParH(level)->recvProcessNeighborF3Z[i].g[0],
 			para->getParH(level)->recvProcessNeighborF3Z[i].numberOfGs,
 			para->getParH(level)->recvProcessNeighborF3Z[i].rankNeighbor);
@@ -1675,7 +1675,7 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//start blocking MPI send
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->sendDataGPU(
+		comm.sendDataGPU(
 			para->getParH(level)->sendProcessNeighborF3Z[i].g[0],
 			para->getParH(level)->sendProcessNeighborF3Z[i].numberOfGs,
 			para->getParH(level)->sendProcessNeighborF3Z[i].rankNeighbor);
@@ -1684,13 +1684,13 @@ void exchangeCollDataF3ZGPU(Parameter* para, vf::gpu::Communicator* comm, CudaMe
 	//Wait
 	for (unsigned int i = 0; i < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")); i++)
 	{
-		comm->waitGPU(i);
+		comm.waitGPU(i);
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//reset the request array
 	if (0 < (unsigned int)(para->getNumberOfProcessNeighborsZ(level, "send")))
 	{
-		comm->resetRequest();
+		comm.resetRequest();
 	}
 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//copy Host to Device
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 34c9cba801c675bdf4c2cd39daca3be2d7918dbe..82662cdc55e8b0ff5f4afe7d31a6563579b45559 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -9,31 +9,31 @@
 
 //////////////////////////////////////////////////////////////////////////
 //1D domain decomposition
-extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition
-extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition convection diffusion
-extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePreCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADXGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADYGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangePostCollDataADZGPU27(Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
 //3D domain decomposition F3 - K18/K20
-extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
-extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator* comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3XGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3YGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
+extern "C" void exchangeCollDataF3ZGPU( Parameter* para, vf::gpu::Communicator& comm, CudaMemoryManager* cudaManager, int level);
 //////////////////////////////////////////////////////////////////////////
-extern "C" void barrierGPU(vf::gpu::Communicator* comm);
+extern "C" void barrierGPU(vf::gpu::Communicator& comm);
 //////////////////////////////////////////////////////////////////////////
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 897f9b4af238dcdc98083d14ebd10c42523ba9d6..62d3a944176617996abd90ce6ba9b19973afeb04 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -50,6 +50,10 @@
 #include <logger/Logger.h>
 
 
+Simulation::Simulation(vf::gpu::Communicator& communicator) : communicator(communicator)
+{
+
+}
 
 std::string getFileName(const std::string& fname, int step, int myID)
 {
@@ -80,10 +84,9 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    this->gridProvider = gridProvider;
    this->cudaManager = cudaManager;
    gridProvider->initalGridInformations();
-   comm = vf::gpu::Communicator::getInstanz();
    this->para = para;
 
-   vf::cuda::verifyAndSetDevice(comm->mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
+   vf::cuda::verifyAndSetDevice(communicator.mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
    
    para->initLBMSimulationParameter();
 
@@ -251,7 +254,7 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
 
    //////////////////////////////////////////////////////////////////////////
    //output << "define the Grid..." ;
-   //defineGrid(para, comm);
+   //defineGrid(para, communicator);
    ////allocateMemory();
    //output << "done.\n";
 
@@ -414,7 +417,7 @@ void Simulation::run()
 	////////////////////////////////////////////////////////////////////////////////
 	for(t=para->getTStart();t<=para->getTEnd();t++)
 	{
-        updateGrid27(para.get(), comm, cudaManager.get(), pm, 0, t, kernels);
+        updateGrid27(para.get(), communicator, cudaManager.get(), pm, 0, t, kernels);
 
 	    ////////////////////////////////////////////////////////////////////////////////
 	    //Particles
@@ -429,7 +432,7 @@ void Simulation::run()
         // run Analyzers for kinetic energy and enstrophy for TGV in 3D
         // these analyzers only work on level 0
 	    ////////////////////////////////////////////////////////////////////////////////
-        if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), comm, cudaManager.get(), 0);
+        if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer ) exchangeMultiGPU(para.get(), communicator, cudaManager.get(), 0);
 
 	    if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t);
 	    if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(t);
@@ -623,7 +626,7 @@ void Simulation::run()
 	  ////////////////////////////////////////////////////////////////////////////////
       // File IO
       ////////////////////////////////////////////////////////////////////////////////
-      //comm->startTimer();
+      //communicator->startTimer();
       if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getTStartOut())
       {
 		  //////////////////////////////////////////////////////////////////////////////////
@@ -669,7 +672,7 @@ void Simulation::run()
             {
 		        //////////////////////////////////////////////////////////////////////////
 		        //exchange data for valid post process
-		        exchangeMultiGPU(para.get(), comm, cudaManager.get(), lev);
+		        exchangeMultiGPU(para.get(), communicator, cudaManager.get(), lev);
                 //////////////////////////////////////////////////////////////////////////
                //if (para->getD3Qxx()==19)
                //{
@@ -1282,8 +1285,4 @@ void Simulation::free()
 			cudaManager->cudaFreeGeomNormals(lev);
 		}
 	}
-	//////////////////////////////////////////////////////////////////////////
-
-    delete comm;
-
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index 924667e678fbb1d5e05fc601086642e31088ccc5..72c86140258b01aec3b3ed00d59c271f1824d514 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -13,9 +13,7 @@
 
 #include "VirtualFluids_GPU_export.h"
 
-namespace vf::gpu {
-class Communicator;
-}
+namespace vf::gpu { class Communicator; }
 
 class CudaMemoryManager;
 class Parameter;
@@ -34,6 +32,7 @@ class TrafficMovementFactory;
 class VIRTUALFLUIDS_GPU_EXPORT Simulation
 {
 public:
+	Simulation(vf::gpu::Communicator& communicator);
 	void run();
 	void init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std::shared_ptr<DataWriter> dataWriter, std::shared_ptr<CudaMemoryManager> cudaManager);
 	void free();
@@ -63,7 +62,7 @@ protected:
 
 	LogWriter output;
 
-    vf::gpu::Communicator* comm;
+	vf::gpu::Communicator& communicator;
     SPtr<Parameter> para;
     SPtr<GridProvider> gridProvider;
     SPtr<DataWriter> dataWriter;
diff --git a/src/logger/Logger.h b/src/logger/Logger.h
index 594decaf5bd85913335e6d1659b6d89cad6d0610..adb7796135a989843ef8de1f778c9901f3ad17c8 100644
--- a/src/logger/Logger.h
+++ b/src/logger/Logger.h
@@ -48,7 +48,7 @@
 #define VF_LOG_TRACE(...) spdlog::trace(__VA_ARGS__)
 #define VF_LOG_DEBUG(...) spdlog::debug(__VA_ARGS__)
 #define VF_LOG_INFO(...) spdlog::info(__VA_ARGS__)
-#define VF_LOG_WARNING(...) spdlog::warning(__VA_ARGS__)
+#define VF_LOG_WARNING(...) spdlog::warn(__VA_ARGS__)
 #define VF_LOG_CRITICAL(...) spdlog::critical(__VA_ARGS__)