From 1ab40ef36a6863e99bbbcfebd848f004ed29f718 Mon Sep 17 00:00:00 2001
From: kutscher <kutscher@irmb.tu-bs.de>
Date: Tue, 2 Jan 2018 13:00:11 +0100
Subject: [PATCH] fixed GenBlocksGridVisitor replace BCArray3D& with
 BCArray3DPtr DLR-F16: working copy for Pheonix, MPIIORestartCoProcessor has a
 BUG! MPIIORestart3CoProcessor is reimplementation of MPIIORestartCoProcessor
 with bug fixing. Buf is fixed in MPIIORestart2CoProcessor. add new
 implementations for MPIIORestartCoProcessor: MPIIORestart11CoProcessor and
 MPIIORestart21CoProcessor, which have separation in dataformat for blocks,
 data set and bc Changed BCAlgorithm for object collection in BCProcessor
 Fixed issue with GenBlocksGridVisitor

---
 .gitignore                                    |    1 +
 .../BoundaryConditions/BCAlgorithm.cpp        |   40 +-
 .../BoundaryConditions/BCAlgorithm.h          |   27 +-
 .../BoundaryConditions/BCArray3D.h            |    5 +-
 .../BoundaryConditions/BCProcessor.cpp        |    5 +-
 .../BoundaryConditions/BoundaryConditions.h   |    5 +-
 .../BoundaryConditions/NoSlipBCAlgorithm.h    |    3 +-
 .../NonEqDensityBCAlgorithm.h                 |    3 +-
 .../ThinWallBCProcessor.cpp                   |    2 +-
 .../BoundaryConditions/VelocityBCAlgorithm.h  |    3 +-
 ...ssor.cpp => MPIIORestart11CoProcessor.cpp} | 2768 +++++++++--------
 ...rocessor.h => MPIIORestart11CoProcessor.h} |  380 +--
 ...essor.cpp => MPIIORestart1CoProcessor.cpp} |   49 +-
 ...Processor.h => MPIIORestart1CoProcessor.h} |   10 +-
 .../MPIIORestart21CoProcessor.cpp             | 1225 ++++++++
 .../CoProcessors/MPIIORestart21CoProcessor.h  |  185 ++
 .../CoProcessors/MPIIORestart2CoProcessor.cpp |   71 +-
 .../CoProcessors/MPIIORestart2CoProcessor.h   |    2 +
 .../TimeAveragedValuesCoProcessor.cpp         |   10 +
 .../Data/D3Q27EsoTwist3DSplittedVector.h      |    5 +-
 source/VirtualFluidsCore/Grid/Block3D.h       |    6 +-
 source/VirtualFluidsCore/Grid/Grid3D.cpp      |   27 +-
 source/VirtualFluidsCore/Grid/Grid3D.h        |    1 +
 .../LBM/CompressibleCumulant2LBMKernel.cpp    | 1081 +++++++
 .../LBM/CompressibleCumulant2LBMKernel.h      |   72 +
 .../Utilities/ConfigurationFile.hpp           |    4 +-
 .../BoundaryConditionsBlockVisitor.cpp        |    6 +-
 .../Visitors/GenBlocksGridVisitor.cpp         |  103 +-
 .../Visitors/GenBlocksGridVisitor.h           |    6 -
 .../Visitors/SpongeLayerBlockVisitor.cpp      |   43 +-
 30 files changed, 4394 insertions(+), 1754 deletions(-)
 create mode 100644 .gitignore
 rename source/VirtualFluidsCore/CoProcessors/{MPIIORestartCoProcessor.cpp => MPIIORestart11CoProcessor.cpp} (66%)
 rename source/VirtualFluidsCore/CoProcessors/{MPIIORestartCoProcessor.h => MPIIORestart11CoProcessor.h} (73%)
 rename source/VirtualFluidsCore/CoProcessors/{MPIIORestart3CoProcessor.cpp => MPIIORestart1CoProcessor.cpp} (97%)
 rename source/VirtualFluidsCore/CoProcessors/{MPIIORestart3CoProcessor.h => MPIIORestart1CoProcessor.h} (95%)
 create mode 100644 source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.cpp
 create mode 100644 source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.h
 create mode 100644 source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.cpp
 create mode 100644 source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.h

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..e660fd93d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+bin/
diff --git a/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp b/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
index 39d1a31b5..a7ea21360 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
+++ b/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
@@ -5,38 +5,16 @@ BCAlgorithm::BCAlgorithm() : compressible(false)
 
 }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::apply()
+void BCAlgorithm::setNodeIndex(int x1, int x2, int x3)
 {
-   int cbc = 0;
-   int cn = 0;
-   int j;
-
-   int nsize = (int)nodeVector.size();
-
-      for (j = cn; j < nsize;)
-      {
-         x1 = nodeVector[j++];
-         x2 = nodeVector[j++];
-         x3 = nodeVector[j++];
-
-         bcPtr = bcVector[cbc];
-         cbc++;
-
-         applyBC();
-      }
-      cn = j;
-}
-//////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::addNode(int x1, int x2, int x3)
-{
-   nodeVector.push_back(x1);
-   nodeVector.push_back(x2);
-   nodeVector.push_back(x3);
+   this->x1 = x1;
+   this->x2 = x2;
+   this->x3 = x3;
 }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::addBcPointer(BoundaryConditionsPtr bcPtr)
+void BCAlgorithm::setBcPointer(BoundaryConditionsPtr bcPtr)
 {
-   bcVector.push_back(bcPtr);
+   this->bcPtr = bcPtr;
 }
 //////////////////////////////////////////////////////////////////////////
 void BCAlgorithm::setCompressible(bool c)
@@ -75,12 +53,6 @@ bool BCAlgorithm::isPreCollision()
    return preCollision;
 }
 //////////////////////////////////////////////////////////////////////////
-void BCAlgorithm::clearData()
-{
-   nodeVector.clear();
-   bcVector.clear();
-}
-//////////////////////////////////////////////////////////////////////////
 BCArray3DPtr BCAlgorithm::getBcArray()
 {
    return bcArray;
diff --git a/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
index 65fecd92f..2b4342a1c 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
@@ -36,25 +36,19 @@ public:
    BCAlgorithm();
    virtual ~BCAlgorithm() {}
    
-   void apply();
    virtual void addDistributions(DistributionArray3DPtr distributions) = 0;
-   void addNode(int x1, int x2, int x3);
-   void addBcPointer(BoundaryConditionsPtr bcPtr);
+   void setNodeIndex(int x1, int x2, int x3);
+   void setBcPointer(BoundaryConditionsPtr bcPtr);
    void setCompressible(bool c);
    void setCollFactor(LBMReal cf);
    char getType();
    bool isPreCollision();
    virtual BCAlgorithmPtr clone()=0;
-   void clearData();
    BCArray3DPtr getBcArray();
    void setBcArray(BCArray3DPtr bcarray);
-
-protected:
    virtual void applyBC() = 0;
-   
-   std::vector <int> nodeVector;
-   std::vector <BoundaryConditionsPtr> bcVector;
 
+protected:
    bool compressible;
    char type;
    bool preCollision;
@@ -75,21 +69,6 @@ protected:
    CalcFeqForDirFct calcFeqsForDirFct ;
    CalcMacrosFct    calcMacrosFct;
    CalcFeqFct       calcFeqFct;
-
-   
-
-private:
-   //friend class boost::serialization::access;
-   //template<class Archive>
-   //void serialize(Archive & ar, const unsigned int version)
-   //{
-   //   ar & nodeVector;
-   //   ar & bcVector;
-   //   ar & compressible;
-   //   ar & type;
-   //   ar & distributions;
-   //   ar & collFactor;
-   //}
 };
 
 
diff --git a/source/VirtualFluidsCore/BoundaryConditions/BCArray3D.h b/source/VirtualFluidsCore/BoundaryConditions/BCArray3D.h
index a33885fe8..4cc7cb6f4 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/BCArray3D.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/BCArray3D.h
@@ -100,9 +100,10 @@ private:
    //////////////////////////////////////////////////////////////////////////
    void deleteBC(std::size_t x1, std::size_t x2, std::size_t x3);
 
-   friend class MPIIORestartCoProcessor;
+   friend class MPIIORestart1CoProcessor;
    friend class MPIIORestart2CoProcessor;
-   friend class MPIIORestart3CoProcessor;
+   friend class MPIIORestart11CoProcessor;
+   friend class MPIIORestart21CoProcessor;
 
    friend class boost::serialization::access;
    template<class Archive>
diff --git a/source/VirtualFluidsCore/BoundaryConditions/BCProcessor.cpp b/source/VirtualFluidsCore/BoundaryConditions/BCProcessor.cpp
index 8fda8e602..b93647dd2 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/BCProcessor.cpp
+++ b/source/VirtualFluidsCore/BoundaryConditions/BCProcessor.cpp
@@ -9,7 +9,6 @@ BCProcessor::BCProcessor()
 BCProcessor::BCProcessor(LBMKernelPtr kernel)
 {
    DistributionArray3DPtr distributions = boost::dynamic_pointer_cast<EsoTwist3D>(kernel->getDataSet()->getFdistributions());
-   //bcArray->resize( distributions->getNX1(), distributions->getNX2(), distributions->getNX3(), BCArray3D::FLUID);
    bcArray = BCArray3DPtr(new BCArray3D( distributions->getNX1(), distributions->getNX2(), distributions->getNX3(), BCArray3D::FLUID));
 }
 //////////////////////////////////////////////////////////////////////////
@@ -50,7 +49,7 @@ void BCProcessor::applyPreCollisionBC()
 {
    BOOST_FOREACH(BCAlgorithmPtr bc, preBC)
    {
-      bc->apply();
+      bc->applyBC();
    }
 }
 //////////////////////////////////////////////////////////////////////////
@@ -58,7 +57,7 @@ void BCProcessor::applyPostCollisionBC()
 {
    BOOST_FOREACH(BCAlgorithmPtr bc, postBC)
    {
-      bc->apply();
+      bc->applyBC();
    }
 }
 //////////////////////////////////////////////////////////////////////////
diff --git a/source/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h b/source/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
index ad42ba649..e4da79f0c 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/BoundaryConditions.h
@@ -256,9 +256,10 @@ protected:
    char algorithmType;
 
 private:
-   friend class MPIIORestartCoProcessor;
+   friend class MPIIORestart1CoProcessor;
    friend class MPIIORestart2CoProcessor;
-   friend class MPIIORestart3CoProcessor;
+   friend class MPIIORestart11CoProcessor;
+   friend class MPIIORestart21CoProcessor;
 
    friend class boost::serialization::access;
    template<class Archive>
diff --git a/source/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.h b/source/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.h
index 30012852f..d6f2a8b54 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/NoSlipBCAlgorithm.h
@@ -13,8 +13,9 @@ public:
    virtual ~NoSlipBCAlgorithm();
    BCAlgorithmPtr clone();
    void addDistributions(DistributionArray3DPtr distributions);
-protected:
    void applyBC();
+protected:
+   
 private:
    //friend class boost::serialization::access;
    //template<class Archive>
diff --git a/source/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.h b/source/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.h
index e206fa7ea..52753316b 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/NonEqDensityBCAlgorithm.h
@@ -13,8 +13,9 @@ public:
    ~NonEqDensityBCAlgorithm();
    BCAlgorithmPtr clone();
    void addDistributions(DistributionArray3DPtr distributions);
-protected:
    void applyBC();
+protected:
+   
 private:
    //friend class boost::serialization::access;
    //template<class Archive>
diff --git a/source/VirtualFluidsCore/BoundaryConditions/ThinWallBCProcessor.cpp b/source/VirtualFluidsCore/BoundaryConditions/ThinWallBCProcessor.cpp
index c5366bc6d..56e84fee5 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/ThinWallBCProcessor.cpp
+++ b/source/VirtualFluidsCore/BoundaryConditions/ThinWallBCProcessor.cpp
@@ -32,7 +32,7 @@ void ThinWallBCProcessor::applyPostCollisionBC()
       if (bc->getType() == BCAlgorithm::ThinWallNoSlipBCAlgorithm)
       {
          boost::dynamic_pointer_cast<ThinWallNoSlipBCAlgorithm>(bc)->setPass(2); 
-         bc->apply();
+         bc->applyBC();
          boost::dynamic_pointer_cast<ThinWallNoSlipBCAlgorithm>(bc)->setPass(1);
       }
    }
diff --git a/source/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.h b/source/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.h
index a231e27ca..ff5564acd 100644
--- a/source/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.h
+++ b/source/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.h
@@ -13,8 +13,9 @@ public:
    ~VelocityBCAlgorithm();
    BCAlgorithmPtr clone();
    void addDistributions(DistributionArray3DPtr distributions);
-protected:
    void applyBC();
+protected:
+   
 private:
    //friend class boost::serialization::access;
    //template<class Archive>
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp b/source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.cpp
similarity index 66%
rename from source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
rename to source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.cpp
index 83783567f..34cf47802 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.cpp
@@ -1,1360 +1,1408 @@
-#include "MPIIORestartCoProcessor.h"
-#include <boost/foreach.hpp>
-#include "D3Q27System.h"
-//#include "LBMKernel.h"
-#include "CompressibleCumulantLBMKernel.h"
-#include "IncompressibleCumulantLBMKernel.h"
-#include "ThinWallBCProcessor.h"
-#include "D3Q27EsoTwist3DSplittedVector.h"
-#include <UbSystem.h>
-#include <MemoryUtil.h>
-
-//! BLOCK_SIZE defines the quantity of the BoundaryCondition-structures written as one block to the file
-//! To avoid overflow in the parameter \a count of the function MPI_File_write_at 
-//! structures BoundaryCondition are being written in blocks containing each of them BLOCK_SIZE structures
-#define BLOCK_SIZE 1024
-
-MPIIORestartCoProcessor::MPIIORestartCoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
-   const std::string& path,
-   CommunicatorPtr comm) :
-   CoProcessor(grid, s),
-   path(path),
-   comm(comm),
-   mpiTypeFreeFlag(false)
-{
-   UbSystem::makeDirectory(path+"/mpi_io_cp");
-
-   memset(&blockParamStr, 0, sizeof(blockParamStr));
-
-   //-------------------------   define MPI types  ---------------------------------
-
-   MPI_Datatype typesGP[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
-   int blocksGP[3] = { 34, 6, 5 };
-   MPI_Aint offsetsGP[3], lbGP, extentGP;
-
-   offsetsGP[0] = 0;
-   MPI_Type_get_extent(MPI_DOUBLE, &lbGP, &extentGP);
-   offsetsGP[1] = blocksGP[0]*extentGP;
-
-   MPI_Type_get_extent(MPI_INT, &lbGP, &extentGP);
-   offsetsGP[2] = offsetsGP[1]+blocksGP[1]*extentGP;
-
-   MPI_Type_create_struct (3, blocksGP, offsetsGP, typesGP, &gridParamType);
-   MPI_Type_commit(&gridParamType);
-
-   //-----------------------------------------------------------------------
-
-   MPI_Type_contiguous(41, MPI_INT, &blockParamType);
-   MPI_Type_commit(&blockParamType);
-
-   //-----------------------------------------------------------------------
-
-   MPI_Datatype typesBlock[2] = { MPI_INT, MPI_CHAR };
-   int blocksBlock[2] = { 13, 1 };
-   MPI_Aint offsetsBlock[2], lbBlock, extentBlock;
-
-   offsetsBlock[0] = 0;
-   MPI_Type_get_extent(MPI_INT, &lbBlock, &extentBlock);
-   offsetsBlock[1] = blocksBlock[0]*extentBlock;
-
-   MPI_Type_create_struct(2, blocksBlock, offsetsBlock, typesBlock, &block3dType);
-   MPI_Type_commit(&block3dType);
-
-   //-----------------------------------------------------------------------
-   MPI_Datatype typesDataSet[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
-   int blocksDataSet[3] = { 2, 5, 2 };
-   MPI_Aint offsetsDatatSet[3], lbDataSet, extentDataSet;
-
-   offsetsDatatSet[0] = 0;
-   MPI_Type_get_extent(MPI_DOUBLE, &lbDataSet, &extentDataSet);
-   offsetsDatatSet[1] = blocksDataSet[0]*extentDataSet;
-
-   MPI_Type_get_extent(MPI_INT, &lbDataSet, &extentDataSet);
-   offsetsDatatSet[2] = offsetsDatatSet[1]+blocksDataSet[1]*extentDataSet;
-
-   MPI_Type_create_struct(3, blocksDataSet, offsetsDatatSet, typesDataSet, &dataSetType);
-   MPI_Type_commit(&dataSetType);
-
-   //-----------------------------------------------------------------------
-
-   MPI_Datatype typesBC[3] = { MPI_LONG_LONG_INT, MPI_FLOAT, MPI_CHAR };
-   int blocksBC[3] = { 5, 38, 1 };
-   MPI_Aint offsetsBC[3], lbBC, extentBC;
-
-   offsetsBC[0] = 0;
-   MPI_Type_get_extent(MPI_LONG_LONG_INT, &lbBC, &extentBC);
-   offsetsBC[1] = blocksBC[0]*extentBC;
-
-   MPI_Type_get_extent(MPI_FLOAT, &lbBC, &extentBC);
-   offsetsBC[2] = offsetsBC[1]+blocksBC[1]*extentBC;
-
-   MPI_Type_create_struct(3, blocksBC, offsetsBC, typesBC, &boundCondType);
-   MPI_Type_commit(&boundCondType);
-
-   //---------------------------------------
-
-   MPI_Type_contiguous(BLOCK_SIZE, boundCondType, &boundCondType1000);
-   MPI_Type_commit(&boundCondType1000);
-
-   //---------------------------------------
-
-   MPI_Type_contiguous(6, MPI_INT, &boundCondTypeAdd);
-   MPI_Type_commit(&boundCondTypeAdd);
-
-}
-//////////////////////////////////////////////////////////////////////////
-MPIIORestartCoProcessor::~MPIIORestartCoProcessor()
-{
-   MPI_Type_free(&gridParamType);
-   MPI_Type_free(&blockParamType);
-   MPI_Type_free(&block3dType);
-   MPI_Type_free(&dataSetType);
-   MPI_Type_free(&boundCondType);
-   MPI_Type_free(&boundCondType1000);
-   MPI_Type_free(&boundCondTypeAdd);
-
-   if (mpiTypeFreeFlag)
-   {
-      MPI_Type_free(&dataSetDoubleType);
-      MPI_Type_free(&bcindexmatrixType);
-   }
-}
-
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::process(double step)
-{
-   if (scheduler->isDue(step))
-   {
-      if (comm->isRoot()) UBLOG(logINFO, "MPIIORestartCoProcessor save step: "<<step);
-      if (comm->isRoot()) UBLOG(logINFO, "Save check point - start");
-      /*if (comm->isRoot())*/ clearAllFiles((int)step);
-      writeBlocks((int)step);
-      writeDataSet((int)step);
-      writeBoundaryConds((int)step);
-      if (comm->isRoot()) UBLOG(logINFO, "Save check point - end");
-      
-      //readDataSet((int)step);
-   }
-}
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::clearAllFiles(int step)
-{
-   MPI_File file_handler1, file_handler2, file_handler3;
-   MPI_Info info = MPI_INFO_NULL;
-   MPI_Offset new_size = 0;
-
-   UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
-   std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
-   //MPI_File_delete(filename1.c_str(), info);
-   int rc1 = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler1);
-   if (rc1 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename1);
-   MPI_File_set_size(file_handler1, new_size);
-   //MPI_File_sync(file_handler1);
-   MPI_File_close(&file_handler1);
-
-   std::string filename2 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
-   //MPI_File_delete(filename2.c_str(), info);
-   int rc2 = MPI_File_open(MPI_COMM_WORLD, filename2.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler2);
-   if (rc2 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename2);
-   MPI_File_set_size(file_handler2, new_size);
-   //MPI_File_sync(file_handler2);
-   MPI_File_close(&file_handler2);
-
-   std::string filename3 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
-   //MPI_File_delete(filename3.c_str(), info);
-   int rc3 = MPI_File_open(MPI_COMM_WORLD, filename3.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler3);
-   if (rc3 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename3);
-   MPI_File_set_size(file_handler3, new_size);
-   //MPI_File_sync(file_handler3);
-   MPI_File_close(&file_handler3);
-}
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::writeBlocks(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   //MPI_Comm_size(MPI_COMM_WORLD, &size);
-   size=1;
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBlocks start collect data rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-   int minInitLevel = this->grid->getCoarsestInitializedLevel();
-   int maxInitLevel = this->grid->getFinestInitializedLevel();
-
-   std::vector<Block3DPtr> blocksVector[25]; // max 25 levels
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      //grid->getBlocks(level, rank, blockVector[level]);
-      grid->getBlocks(level, blocksVector[level]);
-      blocksCount += static_cast<int>(blocksVector[level].size());
-   }
-
-   GridParam* gridParameters = new GridParam;
-   gridParameters->trafoParams[0] = grid->getCoordinateTransformator()->Tx1;
-   gridParameters->trafoParams[1] = grid->getCoordinateTransformator()->Tx2;
-   gridParameters->trafoParams[2] = grid->getCoordinateTransformator()->Tx3;
-   gridParameters->trafoParams[3] = grid->getCoordinateTransformator()->Sx1;
-   gridParameters->trafoParams[4] = grid->getCoordinateTransformator()->Sx2;
-   gridParameters->trafoParams[5] = grid->getCoordinateTransformator()->Sx3;
-   gridParameters->trafoParams[6] = grid->getCoordinateTransformator()->alpha;
-   gridParameters->trafoParams[7] = grid->getCoordinateTransformator()->beta;
-   gridParameters->trafoParams[8] = grid->getCoordinateTransformator()->gamma;
-
-   gridParameters->trafoParams[9] = grid->getCoordinateTransformator()->toX1factorX1;
-   gridParameters->trafoParams[10] = grid->getCoordinateTransformator()->toX1factorX2;
-   gridParameters->trafoParams[11] = grid->getCoordinateTransformator()->toX1factorX3;
-   gridParameters->trafoParams[12] = grid->getCoordinateTransformator()->toX1delta;
-   gridParameters->trafoParams[13] = grid->getCoordinateTransformator()->toX2factorX1;
-   gridParameters->trafoParams[14] = grid->getCoordinateTransformator()->toX2factorX2;
-   gridParameters->trafoParams[15] = grid->getCoordinateTransformator()->toX2factorX3;
-   gridParameters->trafoParams[16] = grid->getCoordinateTransformator()->toX2delta;
-   gridParameters->trafoParams[17] = grid->getCoordinateTransformator()->toX3factorX1;
-   gridParameters->trafoParams[18] = grid->getCoordinateTransformator()->toX3factorX2;
-   gridParameters->trafoParams[19] = grid->getCoordinateTransformator()->toX3factorX3;
-   gridParameters->trafoParams[20] = grid->getCoordinateTransformator()->toX3delta;
-
-   gridParameters->trafoParams[21] = grid->getCoordinateTransformator()->fromX1factorX1;
-   gridParameters->trafoParams[22] = grid->getCoordinateTransformator()->fromX1factorX2;
-   gridParameters->trafoParams[23] = grid->getCoordinateTransformator()->fromX1factorX3;
-   gridParameters->trafoParams[24] = grid->getCoordinateTransformator()->fromX1delta;
-   gridParameters->trafoParams[25] = grid->getCoordinateTransformator()->fromX2factorX1;
-   gridParameters->trafoParams[26] = grid->getCoordinateTransformator()->fromX2factorX2;
-   gridParameters->trafoParams[27] = grid->getCoordinateTransformator()->fromX2factorX3;
-   gridParameters->trafoParams[28] = grid->getCoordinateTransformator()->fromX2delta;
-   gridParameters->trafoParams[29] = grid->getCoordinateTransformator()->fromX3factorX1;
-   gridParameters->trafoParams[30] = grid->getCoordinateTransformator()->fromX3factorX2;
-   gridParameters->trafoParams[31] = grid->getCoordinateTransformator()->fromX3factorX3;
-   gridParameters->trafoParams[32] = grid->getCoordinateTransformator()->fromX3delta;
-
-   gridParameters->active = grid->getCoordinateTransformator()->active;
-   gridParameters->transformation = grid->getCoordinateTransformator()->transformation;
-
-   gridParameters->deltaX = grid->getDeltaX(minInitLevel);
-   UbTupleInt3 blocknx = grid->getBlockNX();
-   gridParameters->blockNx1 = val<1>(blocknx);
-   gridParameters->blockNx2 = val<2>(blocknx);
-   gridParameters->blockNx3 = val<3>(blocknx);
-   gridParameters->nx1 = grid->getNX1();
-   gridParameters->nx2 = grid->getNX2();
-   gridParameters->nx3 = grid->getNX3();
-   gridParameters->periodicX1 = grid->isPeriodicX1();
-   gridParameters->periodicX2 = grid->isPeriodicX2();
-   gridParameters->periodicX3 = grid->isPeriodicX3();
-
-   //----------------------------------------------------------------------
-
-   Block3d* block3dArray = new Block3d[blocksCount];
-   bool firstBlock = true;
-   int ic = 0;
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	all the blocks of the current level
-      {
-         if (firstBlock && block->getKernel()) // when first (any) valid block...
-         {
-            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > averageDensityArray = block->getKernel()->getDataSet()->getAverageDencity();
-            if (averageDensityArray)
-            {
-               blockParamStr.nx[0][0] = static_cast<int>(averageDensityArray->getNX1());
-               blockParamStr.nx[0][1] = static_cast<int>(averageDensityArray->getNX2());
-               blockParamStr.nx[0][2] = static_cast<int>(averageDensityArray->getNX3());
-               blockParamStr.nx[0][3] = static_cast<int>(averageDensityArray->getNX4());
-            }
-
-            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
-            if (AverageVelocityArray3DPtr)
-            {
-               blockParamStr.nx[1][0] = static_cast<int>(AverageVelocityArray3DPtr->getNX1());
-               blockParamStr.nx[1][1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
-               blockParamStr.nx[1][2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
-               blockParamStr.nx[1][3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
-            }
-
-            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
-            if (AverageFluctArray3DPtr)
-            {
-               blockParamStr.nx[2][0] = static_cast<int>(AverageFluctArray3DPtr->getNX1());
-               blockParamStr.nx[2][1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
-               blockParamStr.nx[2][2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
-               blockParamStr.nx[2][3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
-            }
-
-            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
-            if (AverageTripleArray3DPtr)
-            {
-               blockParamStr.nx[3][0] = static_cast<int>(AverageTripleArray3DPtr->getNX1());
-               blockParamStr.nx[3][1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
-               blockParamStr.nx[3][2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
-               blockParamStr.nx[3][3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
-            }
-
-            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
-            if (ShearStressValArray3DPtr)
-            {
-               blockParamStr.nx[4][0] = static_cast<int>(ShearStressValArray3DPtr->getNX1());
-               blockParamStr.nx[4][1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
-               blockParamStr.nx[4][2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
-               blockParamStr.nx[4][3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
-            }
-
-            boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
-            if (relaxationFactor3DPtr)
-            {
-               blockParamStr.nx[5][0] = static_cast<int>(relaxationFactor3DPtr->getNX1());
-               blockParamStr.nx[5][1] = static_cast<int>(relaxationFactor3DPtr->getNX2());
-               blockParamStr.nx[5][2] = static_cast<int>(relaxationFactor3DPtr->getNX3());
-               blockParamStr.nx[5][3] = 1;
-            }
-
-            boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
-            CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
-            if (localDistributions)
-            {
-               blockParamStr.nx[6][0] = static_cast<int>(localDistributions->getNX1());
-               blockParamStr.nx[6][1] = static_cast<int>(localDistributions->getNX2());
-               blockParamStr.nx[6][2] = static_cast<int>(localDistributions->getNX3());
-               blockParamStr.nx[6][3] = static_cast<int>(localDistributions->getNX4());
-            }
-
-            CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
-            if (nonLocalDistributions)
-            {
-               blockParamStr.nx[7][0] = static_cast<int>(nonLocalDistributions->getNX1());
-               blockParamStr.nx[7][1] = static_cast<int>(nonLocalDistributions->getNX2());
-               blockParamStr.nx[7][2] = static_cast<int>(nonLocalDistributions->getNX3());
-               blockParamStr.nx[7][3] = static_cast<int>(nonLocalDistributions->getNX4());
-            }
-
-            CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
-            if (zeroDistributions)
-            {
-               blockParamStr.nx[8][0] = static_cast<int>(zeroDistributions->getNX1());
-               blockParamStr.nx[8][1] = static_cast<int>(zeroDistributions->getNX2());
-               blockParamStr.nx[8][2] = static_cast<int>(zeroDistributions->getNX3());
-               blockParamStr.nx[8][3] = 1;
-            }
-
-            // ... than save some parameters that are equal in all blocks
-            blockParamStr.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
-            blockParamStr.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
-            blockParamStr.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
-
-            firstBlock = false;
-
-            // how many elements are in all arrays of DataSet (equal in all blocks)
-            int doubleCount = 0, temp;
-            for (int i = 0; i<9; i++)   // 9 arrays ( averageValues, averageVelocity, averageFluktuations,
-            {                 // averageTriplecorrelations, shearStressValues, relaxationFactor, 3 * fdistributions
-               temp = 1;
-               for (int ii = 0; ii<4; ii++)
-                  temp *= blockParamStr.nx[i][ii];
-               doubleCount += temp;
-            }
-            blockParamStr.doubleCountInBlock = doubleCount;
-
-            // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D) is always equal,
-            // this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when writing/reading BoundConds
-            BCArray3DPtr bcArr = block->getKernel()->getBCProcessor()->getBCArray();
-            blockParamStr.bcindexmatrix_count = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
-         }
-
-         // save data describing the block
-         block3dArray[ic].x1 = block->getX1();
-         block3dArray[ic].x2 = block->getX2();
-         block3dArray[ic].x3 = block->getX3();
-         block3dArray[ic].bundle = block->getBundle();
-         block3dArray[ic].rank = block->getRank();
-         block3dArray[ic].lrank = block->getLocalRank();
-         block3dArray[ic].part = block->getPart();
-         block3dArray[ic].globalID = block->getGlobalID();
-         block3dArray[ic].localID = block->getLocalID();
-         block3dArray[ic].level = block->getLevel();
-         block3dArray[ic].interpolationFlagCF = block->getInterpolationFlagCF();
-         block3dArray[ic].interpolationFlagFC = block->getInterpolationFlagFC();
-         block3dArray[ic].counter = block->getMaxGlobalID();
-         block3dArray[ic].active = block->isActive();
-
-         ic++;
-      }
-   }
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBlocks start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   MPI_File file_handler;
-   MPI_Info info = MPI_INFO_NULL;
-   //MPI_Info_create (&info);
-   //MPI_Info_set(info,"romio_cb_write","enable");
-   //MPI_Info_set(info,"cb_buffer_size","4194304");
-   //MPI_Info_set(info,"striping_unit","4194304");
-
-   // if (comm->isRoot())
-   // {
-   UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
-   std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler);
-   if (rc != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename);
-   // }
-
-   double start, finish;
-   MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
-
-   //MPI_Offset new_size = 0;
-   //MPI_File_set_size(file_handler, new_size);
-
-   if (comm->isRoot())
-   {
-      start = MPI_Wtime();
-
-      // each process writes the quantity of it's blocks
-      MPI_File_write_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-      // each process writes parameters of the grid
-      MPI_File_write_at(file_handler, write_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
-      // each process writes common parameters of a block
-      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +sizeof(GridParam)), &blockParamStr, 1, blockParamType, MPI_STATUS_IGNORE);
-      // each process writes it's blocks
-      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +sizeof(GridParam)+sizeof(BlockParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
-   }
-
-   MPI_File_sync(file_handler);
-   MPI_File_close(&file_handler);
- 
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBlocks time: "<<finish-start<<" s");
-   }
-
-   // register new MPI-types depending on the block-specific information
-   MPI_Type_contiguous(blockParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-   MPI_Type_commit(&dataSetDoubleType);
-
-   MPI_Type_contiguous(blockParamStr.bcindexmatrix_count, MPI_INT, &bcindexmatrixType);
-   MPI_Type_commit(&bcindexmatrixType);
-
-   mpiTypeFreeFlag = true;
-
-   delete[] block3dArray;
-   delete gridParameters;
-}
-
-void MPIIORestartCoProcessor::writeDataSet(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-   int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
-
-   std::vector<Block3DPtr> blocksVector[25];
-   int minInitLevel = this->grid->getCoarsestInitializedLevel();
-   int maxInitLevel = this->grid->getFinestInitializedLevel();
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      grid->getBlocks(level, rank, blocksVector[level]);
-      blocksCount += static_cast<int>(blocksVector[level].size());
-   }
-
-   DataSet* dataSetArray = new DataSet[blocksCount];
-   std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks 
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet start collect data rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   int ic = 0;
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
-      {
-         dataSetArray[ic].x1 = block->getX1();     // coordinates of the block needed to find it while regenerating the grid
-         dataSetArray[ic].x2 = block->getX2();
-         dataSetArray[ic].x3 = block->getX3();
-         dataSetArray[ic].level = block->getLevel();
-         if (block->getKernel())
-         {
-            dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
-            dataSetArray[ic].collFactor = block->getKernel()->getCollisionFactor();
-            dataSetArray[ic].deltaT = block->getKernel()->getDeltaT();
-            dataSetArray[ic].compressible = block->getKernel()->getCompressible();
-            dataSetArray[ic].withForcing = block->getKernel()->getWithForcing();
-         }
-         else
-         {
-            dataSetArray[ic].ghostLayerWidth = 0;
-            dataSetArray[ic].collFactor = 0.0;
-            dataSetArray[ic].deltaT = 0.0;
-            dataSetArray[ic].compressible = false;
-            dataSetArray[ic].withForcing = false;
-         }
-         //dataSetArrayGW[ic].x1 = dataSetArray[ic].x1;
-         //dataSetArrayGW[ic].x2 = dataSetArray[ic].x2;
-         //dataSetArrayGW[ic].x3 = dataSetArray[ic].x3;
-         //dataSetArrayGW[ic].level = dataSetArray[ic].level;
-         //dataSetArrayGW[ic].ghostLayerWidth = dataSetArray[ic].ghostLayerWidth;
-         //dataSetArrayGW[ic].collFactor = dataSetArray[ic].collFactor;
-         //dataSetArrayGW[ic].deltaT = dataSetArray[ic].deltaT;
-         //dataSetArrayGW[ic].compressible = dataSetArray[ic].compressible;
-         //dataSetArrayGW[ic].withForcing = dataSetArray[ic].withForcing;
-
-         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageValuesArray3DPtr = block->getKernel()->getDataSet()->getAverageDencity();
-         if (AverageValuesArray3DPtr&&(blockParamStr.nx[0][0]>0)&&(blockParamStr.nx[0][1]>0)&&(blockParamStr.nx[0][2]>0)&&(blockParamStr.nx[0][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), AverageValuesArray3DPtr->getDataVector().begin(), AverageValuesArray3DPtr->getDataVector().end());
-
-         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
-         if (AverageVelocityArray3DPtr&&(blockParamStr.nx[1][0]>0)&&(blockParamStr.nx[1][1]>0)&&(blockParamStr.nx[1][2]>0)&&(blockParamStr.nx[1][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
-
-         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
-         if (AverageFluctArray3DPtr&&(blockParamStr.nx[2][0]>0)&&(blockParamStr.nx[2][1]>0)&&(blockParamStr.nx[2][2]>0)&&(blockParamStr.nx[2][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(), AverageFluctArray3DPtr->getDataVector().end());
-
-         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
-         if (AverageTripleArray3DPtr&&(blockParamStr.nx[3][0]>0)&&(blockParamStr.nx[3][1]>0)&&(blockParamStr.nx[3][2]>0)&&(blockParamStr.nx[3][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
-
-         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
-         if (ShearStressValArray3DPtr&&(blockParamStr.nx[4][0]>0)&&(blockParamStr.nx[4][1]>0)&&(blockParamStr.nx[4][2]>0)&&(blockParamStr.nx[4][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(), ShearStressValArray3DPtr->getDataVector().end());
-
-         boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > RelaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
-         if (RelaxationFactor3DPtr&&(blockParamStr.nx[5][0]>0)&&(blockParamStr.nx[5][1]>0)&&(blockParamStr.nx[5][2]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), RelaxationFactor3DPtr->getDataVector().begin(), RelaxationFactor3DPtr->getDataVector().end());
-
-         boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
-         CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
-         if (localDistributions&&(blockParamStr.nx[6][0]>0)&&(blockParamStr.nx[6][1]>0)&&(blockParamStr.nx[6][2]>0)&&(blockParamStr.nx[6][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), localDistributions->getDataVector().begin(), localDistributions->getDataVector().end());
-
-         CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
-         if (nonLocalDistributions&&(blockParamStr.nx[7][0]>0)&&(blockParamStr.nx[7][1]>0)&&(blockParamStr.nx[7][2]>0)&&(blockParamStr.nx[7][3]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributions->getDataVector().begin(), nonLocalDistributions->getDataVector().end());
-
-         CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
-         if (zeroDistributions&&(blockParamStr.nx[8][0]>0)&&(blockParamStr.nx[8][1]>0)&&(blockParamStr.nx[8][2]>0))
-            doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributions->getDataVector().begin(), zeroDistributions->getDataVector().end());
-
-         ic++;
-      }
-   }
-
-   //doubleValuesArrayGW.assign(doubleValuesArray.begin(), doubleValuesArray.end());
-   
-      if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   // write to the file
-   // all processes calculate their offsets (quantity of bytes that the process is going to write) 
-   // and notify the next process (with the rank = rank + 1)
-   MPI_Offset write_offset = (MPI_Offset)(size*sizeof(int));
-   size_t next_write_offset = 0;
-
-   if (size>1)
-   {
-      if (rank==0)
-      {
-         next_write_offset = write_offset + blocksCount * (sizeof(DataSet)+blockParamStr.doubleCountInBlock*sizeof(double));
-         MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-      }
-      else
-      {
-         MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-         next_write_offset = write_offset + blocksCount * (sizeof(DataSet)+blockParamStr.doubleCountInBlock*sizeof(double));
-         if (rank<size-1)
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
-      }
-   }
-
-   double start, finish;
-   if (comm->isRoot()) start = MPI_Wtime();
-
-   MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN
-   MPI_Info_create(&info);
-   MPI_Info_set(info, "striping_factor", "40");
-   MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-   MPI_File file_handler;
-   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE| MPI_MODE_WRONLY, info, &file_handler);
-   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
-
-   //MPI_Offset new_size = 0;
-   //MPI_File_set_size(file_handler, new_size);
-
-   //std::cout << "writeDataSet rank=" << rank << ",blocksCount=" << blocksCount;
-   //std::cout << ", rank*sizeof(int)=" << (MPI_Offset)(rank * sizeof(int)) << ", write_offset=" << write_offset << std::endl;
-
-   // each process writes the quantity of it's blocks
-   MPI_File_write_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-   // each process writes data identifying blocks
-   MPI_File_write_at(file_handler, write_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-   // each process writes the dataSet arrays
-   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-   MPI_File_sync(file_handler);
-   //std::cout << "writeDataSet rank=" << rank << ", write_offset1=" << write_offset << ", write_offset2=" << write_offset + blocksCount * sizeof(DataSet) << std::endl;
-   
-   //int blockC;
-   //MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blockC, 1, MPI_INT, MPI_STATUS_IGNORE);
-   //std::cout << "readDataSet rank=" << rank << ", blockC=" << blockC << std::endl;
-   
-   MPI_File_close(&file_handler);
-
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet time: "<<finish-start<<" s");
-   }
-
-   delete[] dataSetArray;
-}
-
-void MPIIORestartCoProcessor::writeBoundaryConds(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   int blocksCount = 0;          // quantity of blocks in the grid, max 2147483648 blocks!
-   size_t count_boundCond = 0;	// how many BoundaryConditions in all blocks
-   int count_indexContainer = 0;	// how many indexContainer-values in all blocks
-   size_t byteCount = 0;			// how many bytes writes this process in the file 
-
-   std::vector<Block3DPtr> blocksVector[25];
-   int minInitLevel = this->grid->getCoarsestInitializedLevel();
-   int maxInitLevel = this->grid->getFinestInitializedLevel();
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      grid->getBlocks(level, rank, blocksVector[level]);
-      blocksCount += static_cast<int>(blocksVector[level].size());
-   }
-
-   BCAdd* bcAddArray = new BCAdd[blocksCount];
-   std::vector<BoundaryCondition> bcVector;
-   std::vector<int> bcindexmatrixV;
-   std::vector<int> indexContainerV;
-
-   int ic = 0;
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  // all the blocks of the current level
-      {
-         BCArray3DPtr bcArr = block->getKernel()->getBCProcessor()->getBCArray();
-
-         bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-         bcAddArray[ic].x2 = block->getX2();
-         bcAddArray[ic].x3 = block->getX3();
-         bcAddArray[ic].level = block->getLevel();
-         bcAddArray[ic].boundCond_count = 0; // how many BoundaryConditions in this block
-         bcAddArray[ic].indexContainer_count = 0;  // how many indexContainer-values in this block
-
-         for (int bc = 0; bc<bcArr->getBCVectorSize(); bc++)
-         {
-            BoundaryCondition* bouCond = new BoundaryCondition();
-            if (bcArr->bcvector[bc]==NULL)
-            {
-               memset(bouCond, 0, sizeof(BoundaryCondition));
-            }
-            else
-            {
-               bouCond->noslipBoundaryFlags = bcArr->bcvector[bc]->getNoSlipBoundary();
-               bouCond->slipBoundaryFlags = bcArr->bcvector[bc]->getSlipBoundary();
-               bouCond->velocityBoundaryFlags = bcArr->bcvector[bc]->getVelocityBoundary();
-               bouCond->densityBoundaryFlags = bcArr->bcvector[bc]->getDensityBoundary();
-               bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
-               bouCond->bcVelocityX1 = bcArr->bcvector[bc]->getBoundaryVelocityX1();
-               bouCond->bcVelocityX2 = bcArr->bcvector[bc]->getBoundaryVelocityX2();
-               bouCond->bcVelocityX3 = bcArr->bcvector[bc]->getBoundaryVelocityX3();
-               bouCond->bcDensity = bcArr->bcvector[bc]->getBoundaryDensity();
-               bouCond->bcLodiDensity = bcArr->bcvector[bc]->getDensityLodiDensity();
-               bouCond->bcLodiVelocityX1 = bcArr->bcvector[bc]->getDensityLodiVelocityX1();
-               bouCond->bcLodiVelocityX2 = bcArr->bcvector[bc]->getDensityLodiVelocityX2();
-               bouCond->bcLodiVelocityX3 = bcArr->bcvector[bc]->getDensityLodiVelocityX3();
-               bouCond->bcLodiLentgh = bcArr->bcvector[bc]->getDensityLodiLength();
-               bouCond->nx1 = bcArr->bcvector[bc]->nx1;
-               bouCond->nx2 = bcArr->bcvector[bc]->nx2;
-               bouCond->nx3 = bcArr->bcvector[bc]->nx3;
-               for (int iq = 0; iq<26; iq++)
-                  bouCond->q[iq] = bcArr->bcvector[bc]->getQ(iq);
-               bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
-            }
-
-            bcVector.push_back(*bouCond);
-            //bcVectorGW.push_back(*bouCond);
-            bcAddArray[ic].boundCond_count++;
-            count_boundCond++;
-         }
-
-         bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
-         //bcindexmatrixVGW.assign(bcindexmatrixV.begin(), bcindexmatrixV.end());
-
-         indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
-         //indexContainerVGW.assign(indexContainerV.begin(), indexContainerV.end());
-
-         bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
-         count_indexContainer += bcAddArray[ic].indexContainer_count;
-
-         ic++;
-      }
-   }
-
-   //how many "big blocks" of BLOCK_SIZE size can by formed
-   int bcBlockCount = (int)(count_boundCond/BLOCK_SIZE);
-   if (bcBlockCount * BLOCK_SIZE<count_boundCond)
-      bcBlockCount += 1;
-   for (int i = (int)count_boundCond; i<bcBlockCount * BLOCK_SIZE; i++)
-   {
-      BoundaryCondition* bouCond = new BoundaryCondition();
-      memset(bouCond, 0, sizeof(BoundaryCondition));
-      bcVector.push_back(*bouCond);
-   }
-
-   byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAdd) + sizeof(int) * (blocksCount * blockParamStr.bcindexmatrix_count + count_indexContainer);
-
-   // write to the file
-   // all processes calculate their offsets (quantity of bytes that the process is going to write) 
-   // and notify the next process (with the rank = rank + 1)
-   MPI_Offset write_offset = (MPI_Offset)(3*size*sizeof(int));
-   size_t next_write_offset = 0;
-
-   if (size>1)
-   {
-      if (rank==0)
-      {
-         next_write_offset = write_offset + byteCount;
-         MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-      }
-      else
-      {
-         MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-         next_write_offset = write_offset + byteCount;
-         if (rank<size-1)
-            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
-      }
-   }
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   double start, finish;
-   if (comm->isRoot()) start = MPI_Wtime();
-
-   MPI_Info info = MPI_INFO_NULL;
-
-#ifdef HLRN
-   MPI_Info_create(&info);
-   MPI_Info_set(info, "striping_factor", "40");
-   MPI_Info_set(info, "striping_unit", "4M");
-#endif
-
-   MPI_File file_handler;
-   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE|MPI_MODE_WRONLY, info, &file_handler);
-   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
-
-   //MPI_Offset new_size = 0;
-   //MPI_File_set_size(file_handler, new_size);
-
-   // each process writes the quantity of it's blocks
-   MPI_File_write_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);	//	blocks quantity
-   // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
-   MPI_File_write_at(file_handler, (MPI_Offset)((rank+size)*sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE); // quantity of BoundConds / BLOCK_SIZE
-   // each process writes the quantity of indexContainer elements in all blocks
-   MPI_File_write_at(file_handler, (MPI_Offset)((rank+2*size)*sizeof(int)), &count_indexContainer, 1, MPI_INT, MPI_STATUS_IGNORE); // quantity of indexContainer	
-
-   // each process writes data identifying the blocks
-   MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
-   // each process writes boundary conditions
-   if (bcVector.size()>0)
-      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +blocksCount*sizeof(BCAdd)), &bcVector[0], bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
-   // each process writes bcindexmatrix values
-   if (bcindexmatrixV.size()>0)
-      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+blocksCount*sizeof(BCAdd)+bcBlockCount*BLOCK_SIZE*sizeof(BoundaryCondition)), &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
-   // each process writes indexContainer values
-   if (indexContainerV.size()>0)
-      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+blocksCount*sizeof(BCAdd)+bcBlockCount*BLOCK_SIZE*sizeof(BoundaryCondition)+blocksCount*blockParamStr.bcindexmatrix_count*sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
-   MPI_File_sync(file_handler);
-
-   MPI_File_close(&file_handler);
-
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: "<<finish-start<<" s");
-   }
-
-   delete[] bcAddArray;
-}
-
-//------------------------------------------- READ -----------------------------------------------
-void MPIIORestartCoProcessor::restart(int step)
-{
-   if (comm->isRoot()) UBLOG(logINFO, "MPIIORestartCoProcessor restart step: "<<step);
-   if (comm->isRoot()) UBLOG(logINFO, "Load check point - start");
-   readBlocks(step);
-   readDataSet(step);
-   readBoundaryConds(step);
-   if (comm->isRoot()) UBLOG(logINFO, "Load check point - end");
-   this->reconnect(grid);
-}
-
-void MPIIORestartCoProcessor::readBlocks(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   //MPI_Comm_size(MPI_COMM_WORLD, &size);
-   size = 1;
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBlocks start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   double start, finish;
-   if (comm->isRoot()) start = MPI_Wtime();
-
-   MPI_File file_handler;
-   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBlocks.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
-
-   // read count of blocks
-   int blocksCount = 0;
-   //MPI_File_read_at(file_handler, rank*sizeof(int), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-   MPI_File_read_at(file_handler, 0, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-   Block3d* block3dArray = new Block3d[blocksCount];
-
-   // calculate the read offset
-   MPI_Offset read_offset = (MPI_Offset)(size * sizeof(int));
-
-   GridParam* gridParameters = new GridParam;
-
-   // read parameters of the grid
-   MPI_File_read_at(file_handler, read_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
-   // read parameters of a block
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(GridParam)), &blockParamStr, 1, blockParamType, MPI_STATUS_IGNORE);
-   // read all the blocks
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(GridParam)+sizeof(BlockParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
-
-   MPI_File_close(&file_handler);
-
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBlocks time: "<<finish-start<<" s");
-   }
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBlocks start of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   // clear the grid
-   std::vector<Block3DPtr> blocksVector;
-   grid->getBlocks(0, blocksVector);
-   int del = 0;
-   BOOST_FOREACH(Block3DPtr block, blocksVector)
-   {
-      grid->deleteBlock(block);
-      del++;
-   }
-
-   // restore the grid
-   CoordinateTransformation3DPtr trafo(new CoordinateTransformation3D());
-   trafo->Tx1 = gridParameters->trafoParams[0];
-   trafo->Tx2 = gridParameters->trafoParams[1];
-   trafo->Tx3 = gridParameters->trafoParams[2];
-   trafo->Sx1 = gridParameters->trafoParams[3];
-   trafo->Sx2 = gridParameters->trafoParams[4];
-   trafo->Sx3 = gridParameters->trafoParams[5];
-   trafo->alpha = gridParameters->trafoParams[6];
-   trafo->beta = gridParameters->trafoParams[7];
-   trafo->gamma = gridParameters->trafoParams[8];
-
-   trafo->toX1factorX1 = gridParameters->trafoParams[9];
-   trafo->toX1factorX2 = gridParameters->trafoParams[10];
-   trafo->toX1factorX3 = gridParameters->trafoParams[11];
-   trafo->toX1delta = gridParameters->trafoParams[12];
-   trafo->toX2factorX1 = gridParameters->trafoParams[13];
-   trafo->toX2factorX2 = gridParameters->trafoParams[14];
-   trafo->toX2factorX3 = gridParameters->trafoParams[15];
-   trafo->toX2delta = gridParameters->trafoParams[16];
-   trafo->toX3factorX1 = gridParameters->trafoParams[17];
-   trafo->toX3factorX2 = gridParameters->trafoParams[18];
-   trafo->toX3factorX3 = gridParameters->trafoParams[19];
-   trafo->toX3delta = gridParameters->trafoParams[20];
-
-   trafo->fromX1factorX1 = gridParameters->trafoParams[21];
-   trafo->fromX1factorX2 = gridParameters->trafoParams[22];
-   trafo->fromX1factorX3 = gridParameters->trafoParams[23];
-   trafo->fromX1delta = gridParameters->trafoParams[24];
-   trafo->fromX2factorX1 = gridParameters->trafoParams[25];
-   trafo->fromX2factorX2 = gridParameters->trafoParams[26];
-   trafo->fromX2factorX3 = gridParameters->trafoParams[27];
-   trafo->fromX2delta = gridParameters->trafoParams[28];
-   trafo->fromX3factorX1 = gridParameters->trafoParams[29];
-   trafo->fromX3factorX2 = gridParameters->trafoParams[30];
-   trafo->fromX3factorX3 = gridParameters->trafoParams[31];
-   trafo->fromX3delta = gridParameters->trafoParams[32];
-
-   trafo->active = gridParameters->active;
-   trafo->transformation = gridParameters->transformation;
-
-   grid->setCoordinateTransformator(trafo);
-
-   grid->setDeltaX(gridParameters->deltaX);
-   grid->setBlockNX(gridParameters->blockNx1, gridParameters->blockNx2, gridParameters->blockNx3);
-   grid->setNX1(gridParameters->nx1);
-   grid->setNX2(gridParameters->nx2);
-   grid->setNX3(gridParameters->nx3);
-   grid->setPeriodicX1(gridParameters->periodicX1);
-   grid->setPeriodicX2(gridParameters->periodicX2);
-   grid->setPeriodicX3(gridParameters->periodicX3);
-
-   // regenerate blocks
-   for (int n = 0; n<blocksCount; n++)
-   {
-      Block3DPtr block(new Block3D(block3dArray[n].x1, block3dArray[n].x2, block3dArray[n].x3, block3dArray[n].level));
-      block->setActive(block3dArray[n].active);
-      block->setBundle(block3dArray[n].bundle);
-      block->setRank(block3dArray[n].rank);
-      block->setLocalRank(block3dArray[n].lrank);
-      block->setGlobalID(block3dArray[n].globalID);
-      block->setLocalID(block3dArray[n].localID);
-      block->setPart(block3dArray[n].part);
-      block->setLevel(block3dArray[n].level);
-      block->interpolationFlagCF = block3dArray[n].interpolationFlagCF;
-      block->interpolationFlagFC = block3dArray[n].interpolationFlagFC;
-
-      grid->addBlock(block);
-   }
-
-   // define MPI_types depending on the block-specific information
-   MPI_Type_contiguous(blockParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
-   MPI_Type_commit(&dataSetDoubleType);
-
-   MPI_Type_contiguous(blockParamStr.bcindexmatrix_count, MPI_INT, &bcindexmatrixType);
-   MPI_Type_commit(&bcindexmatrixType);
-
-   mpiTypeFreeFlag = true;
-
-   delete gridParameters;
-   delete[] block3dArray;
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBlocks end of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-}
-
-void MPIIORestartCoProcessor::readDataSet(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-   double start, finish;
-   if (comm->isRoot()) start = MPI_Wtime();
-
-   MPI_File file_handler;
-   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
-
-   // read count of blocks
-   int blocksCount = 0;
-   MPI_File_read_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-   
-   DataSet* dataSetArray = new DataSet[blocksCount];
-   std::vector<double> doubleValuesArray(blocksCount * blockParamStr.doubleCountInBlock); // double-values in all blocks 
-
-   // calculate the read offset
-   MPI_Offset read_offset = (MPI_Offset)(size*sizeof(int));
-   size_t next_read_offset = 0;
-
-   if(size > 1)
-   {
-   	if(rank == 0)
-   	{
-   		next_read_offset = read_offset + blocksCount * (sizeof(DataSet) + blockParamStr.doubleCountInBlock * sizeof(double));
-   		MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-   	}
-   	else
-   	{
-   		MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-         next_read_offset = read_offset + blocksCount * (sizeof(DataSet) + blockParamStr.doubleCountInBlock * sizeof(double));
-   		if(rank < size - 1)
-   			MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
-   	}
-   }
-
-   int chunkFlag = 0;
-
-   //if (rank == 0)
-   //{
-   //   MPI_File_read_at(file_handler, read_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-   //   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-   //    
-   //   for (int i=1; i<size; i+=chunk)
-   //   {
-   //      for (int j=i; j<i+chunk; j++)
-   //      {
-   //         if (j < size)
-   //         {
-   //            MPI_Send(&chunkFlag, 1, MPI_INT, j, 77, MPI_COMM_WORLD);
-   //            //UBLOG(logINFO, "j= "<<j);
-   //         }
-   //      }
-   //      for (int j=i; j<i+chunk; j++)
-   //      {
-   //         if (j < size)
-   //         {
-   //            MPI_Recv(&chunkFlag, 1, MPI_INT, j, 77, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-   //         }
-   //      }
-   //   }
-   //}
-   //else
-   //{
-   //   MPI_Recv(&chunkFlag, 1, MPI_INT, 0, 77, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-   //   MPI_File_read_at(file_handler, read_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-   //   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-   //   MPI_Send(&chunkFlag, 1, MPI_INT, 0, 77, MPI_COMM_WORLD);
-   //   //UBLOG(logINFO, "read rank= "<<rank);
-   //}
-
-   MPI_File_read_at(file_handler, read_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
-   MPI_File_close(&file_handler);
-
-   /*for (int ch = 0; ch < blocksCount; ch++)
-   {
-      if ((dataSetArrayGW[ch].x1 != dataSetArray[ch].x1) ||
-         (dataSetArrayGW[ch].x2 != dataSetArray[ch].x2) ||
-         (dataSetArrayGW[ch].x3 != dataSetArray[ch].x3) ||
-         (dataSetArrayGW[ch].level != dataSetArray[ch].level) ||
-         (dataSetArrayGW[ch].ghostLayerWidth != dataSetArray[ch].ghostLayerWidth) ||
-         (dataSetArrayGW[ch].collFactor != dataSetArray[ch].collFactor) ||
-         (dataSetArrayGW[ch].deltaT != dataSetArray[ch].deltaT) ||
-         (dataSetArrayGW[ch].compressible != dataSetArray[ch].compressible) ||
-         (dataSetArrayGW[ch].withForcing != dataSetArray[ch].withForcing)) 
-         std::cout << "dataSetArrayGW != rank" << rank << ", !!!!!====="<< std::endl;
-   }
-   for (int vl = 0; vl < doubleValuesArrayGW.size(); vl++)
-      if(doubleValuesArrayGW[vl] != doubleValuesArray[vl])
-         std::cout << "doubleValuesArrayGW != rank" << rank << ", !!!!!====="<< std::endl;*/
-
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: "<<finish-start<<" s");
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   size_t index = 0, nextVectorSize = 0;
-   std::vector<double> vectorsOfValues[9];
-   for (int n = 0; n<blocksCount; n++)
-   {
-      for (int b = 0; b<9; b++) // assign approciate vectors for 9 dataSet arrays
-      {
-         nextVectorSize = blockParamStr.nx[b][0]*blockParamStr.nx[b][1]*blockParamStr.nx[b][2]*blockParamStr.nx[b][3];
-         vectorsOfValues[b].assign(doubleValuesArray.data()+index, doubleValuesArray.data()+index+nextVectorSize);
-         index += nextVectorSize;
-      }
-
-      // fill dataSet arrays
-      AverageValuesArray3DPtr mAverageDensity;
-      if ((blockParamStr.nx[0][0]==0)&&(blockParamStr.nx[0][1]==0)&&(blockParamStr.nx[0][2]==0)&&(blockParamStr.nx[0][3]==0))
-         mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
-      else
-         mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[0], blockParamStr.nx[0][0], blockParamStr.nx[0][1], blockParamStr.nx[0][2], blockParamStr.nx[0][3]));
-
-      AverageValuesArray3DPtr mAverageVelocity;
-      if ((blockParamStr.nx[1][0]==0)&&(blockParamStr.nx[1][1]==0)&&(blockParamStr.nx[1][2]==0)&&(blockParamStr.nx[1][3]==0))
-         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
-      else
-         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[1], blockParamStr.nx[1][0], blockParamStr.nx[1][1], blockParamStr.nx[1][2], blockParamStr.nx[1][3]));
-
-      AverageValuesArray3DPtr mAverageFluktuations;
-      if ((blockParamStr.nx[2][0]==0)&&(blockParamStr.nx[2][1]==0)&&(blockParamStr.nx[2][2]==0)&&(blockParamStr.nx[2][3]==0))
-         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
-      else
-         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[2], blockParamStr.nx[2][0], blockParamStr.nx[2][1], blockParamStr.nx[2][2], blockParamStr.nx[2][3]));
-
-      AverageValuesArray3DPtr mAverageTriplecorrelations;
-      if ((blockParamStr.nx[3][0]==0)&&(blockParamStr.nx[3][1]==0)&&(blockParamStr.nx[3][2]==0)&&(blockParamStr.nx[3][3]==0))
-         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
-      else
-         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[3], blockParamStr.nx[3][0], blockParamStr.nx[3][1], blockParamStr.nx[3][2], blockParamStr.nx[3][3]));
-
-      ShearStressValuesArray3DPtr mShearStressValues;
-      if ((blockParamStr.nx[4][0]==0)&&(blockParamStr.nx[4][1]==0)&&(blockParamStr.nx[4][2]==0)&&(blockParamStr.nx[4][3]==0))
-         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
-      else
-         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[4], blockParamStr.nx[4][0], blockParamStr.nx[4][1], blockParamStr.nx[4][2], blockParamStr.nx[4][3]));
-
-      RelaxationFactorArray3DPtr mRelaxationFactor;
-      if ((blockParamStr.nx[5][0]==0)&&(blockParamStr.nx[5][1]==0)&&(blockParamStr.nx[5][2]==0))
-         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr();
-      else
-         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[5], blockParamStr.nx[5][0], blockParamStr.nx[5][1], blockParamStr.nx[5][2]));
-
-      DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector(blockParamStr.nx1, blockParamStr.nx2, blockParamStr.nx3, -999.0));
-
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[6], blockParamStr.nx[6][0], blockParamStr.nx[6][1], blockParamStr.nx[6][2], blockParamStr.nx[6][3])));
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[7], blockParamStr.nx[7][0], blockParamStr.nx[7][1], blockParamStr.nx[7][2], blockParamStr.nx[7][3])));
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[8], blockParamStr.nx[8][0], blockParamStr.nx[8][1], blockParamStr.nx[8][2])));
-
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(blockParamStr.nx1);
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(blockParamStr.nx2);
-      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(blockParamStr.nx3);
-
-      DataSet3DPtr dataSetPtr = DataSet3DPtr(new DataSet3D());
-      dataSetPtr->setAverageDencity(mAverageDensity);
-      dataSetPtr->setAverageVelocity(mAverageVelocity);
-      dataSetPtr->setAverageFluctuations(mAverageFluktuations);
-      dataSetPtr->setAverageTriplecorrelations(mAverageTriplecorrelations);
-      dataSetPtr->setShearStressValues(mShearStressValues);
-      dataSetPtr->setRelaxationFactor(mRelaxationFactor);
-      dataSetPtr->setFdistributions(mFdistributions);
-
-      // find the nesessary block and fill it
-      Block3DPtr block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
-      //LBMKernelPtr kernel(new CompressibleCumulantLBMKernel());
-      //LBMKernelPtr kernel(new IncompressibleCumulantLBMKernel());
-      LBMKernelPtr kernel = this->lbmKernel->clone();
-      kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
-      kernel->setCollisionFactor(dataSetArray[n].collFactor);
-      kernel->setDeltaT(dataSetArray[n].deltaT);
-      kernel->setCompressible(dataSetArray[n].compressible);
-      kernel->setWithForcing(dataSetArray[n].withForcing);
-      kernel->setDataSet(dataSetPtr);
-      block->setKernel(kernel);
-      //block->getKernel()->setDataSet(dataSetPtr);
-   }
-
-   delete[] dataSetArray;
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-}
-
-void MPIIORestartCoProcessor::readBoundaryConds(int step)
-{
-   int rank, size;
-   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-   MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds start MPI IO rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-   double start, finish;
-   if (comm->isRoot()) start = MPI_Wtime();
-
-   MPI_File file_handler;
-   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
-   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
-   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
-
-   int blocksCount = 0;
-   int dataCount1000 = 0;
-   int dataCount2 = 0;
-   // read count of blocks
-   MPI_File_read_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-   // read count of big BoundaryCondition blocks
-   MPI_File_read_at(file_handler, (MPI_Offset)((rank+size)*sizeof(int)), &dataCount1000, 1, MPI_INT, MPI_STATUS_IGNORE);
-   // read count of indexContainer values in all blocks
-   MPI_File_read_at(file_handler, (MPI_Offset)((rank+2*size)*sizeof(int)), &dataCount2, 1, MPI_INT, MPI_STATUS_IGNORE);
-
-   size_t dataCount = dataCount1000 * BLOCK_SIZE;
-   BCAdd* bcAddArray = new BCAdd[blocksCount];
-   BoundaryCondition* bcArray = new BoundaryCondition[dataCount];
-   BoundaryCondition* nullBouCond = new BoundaryCondition();
-   memset(nullBouCond, 0, sizeof(BoundaryCondition));
-   int* intArray1 = new int[blocksCount * blockParamStr.bcindexmatrix_count];
-   int* intArray2 = new int[dataCount2];
-
-   MPI_Offset read_offset = (MPI_Offset)(3*size*sizeof(int));
-   size_t next_read_offset = 0;
-
-   if (size>1)
-   {
-      if (rank==0)
-      {
-         next_read_offset = read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+(blocksCount * blockParamStr.bcindexmatrix_count+dataCount2)*sizeof(int);
-         MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-      }
-      else
-      {
-         MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-         next_read_offset = read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+(blocksCount * blockParamStr.bcindexmatrix_count+dataCount2)*sizeof(int);
-         if (rank<size-1)
-            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
-      }
-   }
-
-   MPI_File_read_at(file_handler, read_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)), &bcArray[0], dataCount1000, boundCondType1000, MPI_STATUS_IGNORE);
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)), &intArray1[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
-   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+blocksCount * blockParamStr.bcindexmatrix_count*sizeof(int)), &intArray2[0], dataCount2, MPI_INT, MPI_STATUS_IGNORE);
-   //MPI_File_sync(file_handler);
-
-   MPI_File_close(&file_handler);
-
-   if (comm->isRoot())
-   {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds time: "<<finish-start<<" s");
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds start of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-
-   int index = 0, index1 = 0, index2 = 0;
-   std::vector<BoundaryConditionsPtr> bcVector;
-   std::vector<int> bcindexmatrixV;
-   std::vector<int> indexContainerV;
-
-   for (size_t n = 0; n<blocksCount; n++)
-   {
-      bcVector.resize(0);
-      bcindexmatrixV.resize(0);
-      indexContainerV.resize(0);
-
-      for (size_t ibc = 0; ibc<bcAddArray[n].boundCond_count; ibc++)
-      {
-         BoundaryConditionsPtr bc;
-         if (memcmp(&bcArray[index], nullBouCond, sizeof(BoundaryCondition))==0)
-            bc = BoundaryConditionsPtr();
-         else
-         {
-            bc = BoundaryConditionsPtr(new BoundaryConditions);
-            bc->noslipBoundaryFlags = bcArray[index].noslipBoundaryFlags;
-            bc->slipBoundaryFlags = bcArray[index].slipBoundaryFlags;
-            bc->densityBoundaryFlags = bcArray[index].densityBoundaryFlags;
-            bc->velocityBoundaryFlags = bcArray[index].velocityBoundaryFlags;
-            bc->wallModelBoundaryFlags = bcArray[index].wallModelBoundaryFlags;
-            bc->bcVelocityX1 = bcArray[index].bcVelocityX1;
-            bc->bcVelocityX2 = bcArray[index].bcVelocityX2;
-            bc->bcVelocityX3 = bcArray[index].bcVelocityX3;
-            bc->bcDensity = bcArray[index].bcDensity;
-            bc->bcLodiDensity = bcArray[index].bcLodiDensity;
-            bc->bcLodiVelocityX1 = bcArray[index].bcLodiVelocityX1;
-            bc->bcLodiVelocityX2 = bcArray[index].bcLodiVelocityX2;
-            bc->bcLodiVelocityX3 = bcArray[index].bcLodiVelocityX3;
-            bc->bcLodiLentgh = bcArray[index].bcLodiLentgh;
-
-            /*if (bcVectorGW[n].noslipBoundaryFlags != bc->noslipBoundaryFlags ||
-               bcVectorGW[n].slipBoundaryFlags != bc->slipBoundaryFlags ||
-               bcVectorGW[n].densityBoundaryFlags != bc->densityBoundaryFlags ||
-               bcVectorGW[n].velocityBoundaryFlags != bc->velocityBoundaryFlags ||
-               bcVectorGW[n].wallModelBoundaryFlags != bc->wallModelBoundaryFlags)
-               std::cout << "readBoundaryConds BoundaryConditionsPtr !!!!===" <<std::endl;*/
-
-            bc->nx1 = bcArray[index].nx1;
-            bc->nx2 = bcArray[index].nx2;
-            bc->nx3 = bcArray[index].nx3;
-            for (int iq = 0; iq<26; iq++)
-               bc->setQ(bcArray[index].q[iq], iq);
-            bc->setBcAlgorithmType(bcArray[index].algorithmType);
-         }
-
-         bcVector.push_back(bc);
-         index++;
-      }
-
-      for (int b1 = 0; b1 < blockParamStr.bcindexmatrix_count; b1++)
-         bcindexmatrixV.push_back(intArray1[index1++]);
-
-      for (int b2 = 0; b2 < bcAddArray[n].indexContainer_count; b2++)
-         indexContainerV.push_back(intArray2[index2++]);
-
-      CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, blockParamStr.nx1, blockParamStr.nx2, blockParamStr.nx3);
-
-      Block3DPtr block = grid->getBlock(bcAddArray[n].x1, bcAddArray[n].x2, bcAddArray[n].x3, bcAddArray[n].level);
-      BCProcessorPtr bcProc = bcProcessor->clone(block->getKernel());
-      BCArray3DPtr bcArr(new BCArray3D());
-      bcArr->bcindexmatrix = bcim;
-      bcArr->bcvector = bcVector;
-      bcArr->indexContainer = indexContainerV;
-      bcProc->setBCArray(bcArr);
-      
-      block->getKernel()->setBCProcessor(bcProc);
-   }
-
-/*   for (int b1 = 0; b1 < bcindexmatrixVGW.size(); b1++)
-   {
-      if (bcindexmatrixVGW[b1] != bcindexmatrixV[b1])
-         std::cout << "readBoundaryConds bcindexmatrixVGW !!!!===" << std::endl;
-   }
-   for (int b2 = 0; b2 < indexContainerVGW.size(); b2++)
-   {
-      if (indexContainerVGW[b2] != indexContainerV[b2])
-         std::cout << "readBoundaryConds indexContainerVGW !!!!===" << std::endl;
-   }
-*/
-   delete nullBouCond;
-   delete[] bcArray;
-   delete[] bcAddArray;
-   delete[] intArray1;
-   delete[] intArray2;
-   
-   if (comm->isRoot())
-   {
-      UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds end of restore of data, rank = "<<rank);
-      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
-   }
-}
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::setChunk(int val)
-{
-   chunk = val;
-}
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::setLBMKernel(LBMKernelPtr kernel)
-{
-   this->lbmKernel = kernel;
-}
-//////////////////////////////////////////////////////////////////////////
-void MPIIORestartCoProcessor::setBCProcessor(BCProcessorPtr bcProcessor)
-{
-   this->bcProcessor = bcProcessor;
-}
-
+#include "MPIIORestart11CoProcessor.h"
+#include <boost/foreach.hpp>
+#include "D3Q27System.h"
+//#include "LBMKernel.h"
+#include "CompressibleCumulantLBMKernel.h"
+#include "IncompressibleCumulantLBMKernel.h"
+#include "ThinWallBCProcessor.h"
+#include "D3Q27EsoTwist3DSplittedVector.h"
+#include <UbSystem.h>
+#include <MemoryUtil.h>
+
+//! BLOCK_SIZE defines the quantity of the BoundaryCondition-structures written as one block to the file
+//! To avoid overflow in the parameter \a count of the function MPI_File_write_at 
+//! structures BoundaryCondition are being written in blocks containing each of them BLOCK_SIZE structures
+#define BLOCK_SIZE 1024
+
+MPIIORestart11CoProcessor::MPIIORestart11CoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
+   const std::string& path,
+   CommunicatorPtr comm) :
+   CoProcessor(grid, s),
+   path(path),
+   comm(comm),
+   mpiTypeFreeFlag(false)
+{
+   UbSystem::makeDirectory(path+"/mpi_io_cp");
+
+   memset(&dataSetParamStr, 0, sizeof(dataSetParamStr));
+   memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
+
+   //-------------------------   define MPI types  ---------------------------------
+
+   MPI_Datatype typesGP[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
+   int blocksGP[3] = { 34, 6, 5 };
+   MPI_Aint offsetsGP[3], lbGP, extentGP;
+
+   offsetsGP[0] = 0;
+   MPI_Type_get_extent(MPI_DOUBLE, &lbGP, &extentGP);
+   offsetsGP[1] = blocksGP[0]*extentGP;
+
+   MPI_Type_get_extent(MPI_INT, &lbGP, &extentGP);
+   offsetsGP[2] = offsetsGP[1]+blocksGP[1]*extentGP;
+
+   MPI_Type_create_struct (3, blocksGP, offsetsGP, typesGP, &gridParamType);
+   MPI_Type_commit(&gridParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesBlock[2] = { MPI_INT, MPI_CHAR };
+   int blocksBlock[2] = { 13, 1 };
+   MPI_Aint offsetsBlock[2], lbBlock, extentBlock;
+
+   offsetsBlock[0] = 0;
+   MPI_Type_get_extent(MPI_INT, &lbBlock, &extentBlock);
+   offsetsBlock[1] = blocksBlock[0]*extentBlock;
+
+   MPI_Type_create_struct(2, blocksBlock, offsetsBlock, typesBlock, &block3dType);
+   MPI_Type_commit(&block3dType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Type_contiguous(40, MPI_INT, &dataSetParamType);
+   MPI_Type_commit(&dataSetParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesDataSet[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
+   int blocksDataSet[3] = { 2, 5, 2 };
+   MPI_Aint offsetsDatatSet[3], lbDataSet, extentDataSet;
+
+   offsetsDatatSet[0] = 0;
+   MPI_Type_get_extent(MPI_DOUBLE, &lbDataSet, &extentDataSet);
+   offsetsDatatSet[1] = blocksDataSet[0]*extentDataSet;
+
+   MPI_Type_get_extent(MPI_INT, &lbDataSet, &extentDataSet);
+   offsetsDatatSet[2] = offsetsDatatSet[1]+blocksDataSet[1]*extentDataSet;
+
+   MPI_Type_create_struct(3, blocksDataSet, offsetsDatatSet, typesDataSet, &dataSetType);
+   MPI_Type_commit(&dataSetType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Type_contiguous(4, MPI_INT, &boundCondParamType);
+   MPI_Type_commit(&boundCondParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesBC[3] = { MPI_LONG_LONG_INT, MPI_FLOAT, MPI_CHAR };
+   int blocksBC[3] = { 5, 38, 1 };
+   MPI_Aint offsetsBC[3], lbBC, extentBC;
+
+   offsetsBC[0] = 0;
+   MPI_Type_get_extent(MPI_LONG_LONG_INT, &lbBC, &extentBC);
+   offsetsBC[1] = blocksBC[0]*extentBC;
+
+   MPI_Type_get_extent(MPI_FLOAT, &lbBC, &extentBC);
+   offsetsBC[2] = offsetsBC[1]+blocksBC[1]*extentBC;
+
+   MPI_Type_create_struct(3, blocksBC, offsetsBC, typesBC, &boundCondType);
+   MPI_Type_commit(&boundCondType);
+
+   //---------------------------------------
+
+   MPI_Type_contiguous(BLOCK_SIZE, boundCondType, &boundCondType1000);
+   MPI_Type_commit(&boundCondType1000);
+
+   //---------------------------------------
+
+   MPI_Type_contiguous(6, MPI_INT, &boundCondTypeAdd);
+   MPI_Type_commit(&boundCondTypeAdd);
+
+}
+//////////////////////////////////////////////////////////////////////////
+MPIIORestart11CoProcessor::~MPIIORestart11CoProcessor()
+{
+   MPI_Type_free(&gridParamType);
+   MPI_Type_free(&block3dType);
+   MPI_Type_free(&dataSetParamType);
+   MPI_Type_free(&dataSetType);
+   MPI_Type_free(&boundCondParamType);
+   MPI_Type_free(&boundCondType);
+   MPI_Type_free(&boundCondType1000);
+   MPI_Type_free(&boundCondTypeAdd);
+
+   if (mpiTypeFreeFlag)
+   {
+      MPI_Type_free(&dataSetDoubleType);
+      MPI_Type_free(&bcindexmatrixType);
+   }
+}
+
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::process(double step)
+{
+   if (scheduler->isDue(step))
+   {
+      if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart11CoProcessor save step: "<<step);
+      if (comm->isRoot()) UBLOG(logINFO, "Save check point - start");
+      /*if (comm->isRoot())*/ clearAllFiles((int)step);
+      writeBlocks((int)step);
+      writeDataSet((int)step);
+      writeBoundaryConds((int)step);
+      if (comm->isRoot()) UBLOG(logINFO, "Save check point - end");
+      
+      //readDataSet((int)step);
+      //readBoundaryConds((int)step);
+   }
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::clearAllFiles(int step)
+{
+   MPI_File file_handler1, file_handler2, file_handler3;
+   MPI_Info info = MPI_INFO_NULL;
+   MPI_Offset new_size = 0;
+
+   UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
+   std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
+   //MPI_File_delete(filename1.c_str(), info);
+   int rc1 = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler1);
+   if (rc1 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename1);
+   MPI_File_set_size(file_handler1, new_size);
+   //MPI_File_sync(file_handler1);
+   MPI_File_close(&file_handler1);
+
+   std::string filename2 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+   //MPI_File_delete(filename2.c_str(), info);
+   int rc2 = MPI_File_open(MPI_COMM_WORLD, filename2.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler2);
+   if (rc2 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename2);
+   MPI_File_set_size(file_handler2, new_size);
+   //MPI_File_sync(file_handler2);
+   MPI_File_close(&file_handler2);
+
+   std::string filename3 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
+   //MPI_File_delete(filename3.c_str(), info);
+   int rc3 = MPI_File_open(MPI_COMM_WORLD, filename3.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler3);
+   if (rc3 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename3);
+   MPI_File_set_size(file_handler3, new_size);
+   //MPI_File_sync(file_handler3);
+   MPI_File_close(&file_handler3);
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::writeBlocks(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   //MPI_Comm_size(MPI_COMM_WORLD, &size);
+   size=1;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBlocks start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+
+   std::vector<Block3DPtr> blocksVector[25]; // max 25 levels
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      //grid->getBlocks(level, rank, blockVector[level]);
+      grid->getBlocks(level, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   GridParam* gridParameters = new GridParam;
+   gridParameters->trafoParams[0] = grid->getCoordinateTransformator()->Tx1;
+   gridParameters->trafoParams[1] = grid->getCoordinateTransformator()->Tx2;
+   gridParameters->trafoParams[2] = grid->getCoordinateTransformator()->Tx3;
+   gridParameters->trafoParams[3] = grid->getCoordinateTransformator()->Sx1;
+   gridParameters->trafoParams[4] = grid->getCoordinateTransformator()->Sx2;
+   gridParameters->trafoParams[5] = grid->getCoordinateTransformator()->Sx3;
+   gridParameters->trafoParams[6] = grid->getCoordinateTransformator()->alpha;
+   gridParameters->trafoParams[7] = grid->getCoordinateTransformator()->beta;
+   gridParameters->trafoParams[8] = grid->getCoordinateTransformator()->gamma;
+
+   gridParameters->trafoParams[9] = grid->getCoordinateTransformator()->toX1factorX1;
+   gridParameters->trafoParams[10] = grid->getCoordinateTransformator()->toX1factorX2;
+   gridParameters->trafoParams[11] = grid->getCoordinateTransformator()->toX1factorX3;
+   gridParameters->trafoParams[12] = grid->getCoordinateTransformator()->toX1delta;
+   gridParameters->trafoParams[13] = grid->getCoordinateTransformator()->toX2factorX1;
+   gridParameters->trafoParams[14] = grid->getCoordinateTransformator()->toX2factorX2;
+   gridParameters->trafoParams[15] = grid->getCoordinateTransformator()->toX2factorX3;
+   gridParameters->trafoParams[16] = grid->getCoordinateTransformator()->toX2delta;
+   gridParameters->trafoParams[17] = grid->getCoordinateTransformator()->toX3factorX1;
+   gridParameters->trafoParams[18] = grid->getCoordinateTransformator()->toX3factorX2;
+   gridParameters->trafoParams[19] = grid->getCoordinateTransformator()->toX3factorX3;
+   gridParameters->trafoParams[20] = grid->getCoordinateTransformator()->toX3delta;
+
+   gridParameters->trafoParams[21] = grid->getCoordinateTransformator()->fromX1factorX1;
+   gridParameters->trafoParams[22] = grid->getCoordinateTransformator()->fromX1factorX2;
+   gridParameters->trafoParams[23] = grid->getCoordinateTransformator()->fromX1factorX3;
+   gridParameters->trafoParams[24] = grid->getCoordinateTransformator()->fromX1delta;
+   gridParameters->trafoParams[25] = grid->getCoordinateTransformator()->fromX2factorX1;
+   gridParameters->trafoParams[26] = grid->getCoordinateTransformator()->fromX2factorX2;
+   gridParameters->trafoParams[27] = grid->getCoordinateTransformator()->fromX2factorX3;
+   gridParameters->trafoParams[28] = grid->getCoordinateTransformator()->fromX2delta;
+   gridParameters->trafoParams[29] = grid->getCoordinateTransformator()->fromX3factorX1;
+   gridParameters->trafoParams[30] = grid->getCoordinateTransformator()->fromX3factorX2;
+   gridParameters->trafoParams[31] = grid->getCoordinateTransformator()->fromX3factorX3;
+   gridParameters->trafoParams[32] = grid->getCoordinateTransformator()->fromX3delta;
+
+   gridParameters->active = grid->getCoordinateTransformator()->active;
+   gridParameters->transformation = grid->getCoordinateTransformator()->transformation;
+
+   gridParameters->deltaX = grid->getDeltaX(minInitLevel);
+   UbTupleInt3 blocknx = grid->getBlockNX();
+   gridParameters->blockNx1 = val<1>(blocknx);
+   gridParameters->blockNx2 = val<2>(blocknx);
+   gridParameters->blockNx3 = val<3>(blocknx);
+   gridParameters->nx1 = grid->getNX1();
+   gridParameters->nx2 = grid->getNX2();
+   gridParameters->nx3 = grid->getNX3();
+   gridParameters->periodicX1 = grid->isPeriodicX1();
+   gridParameters->periodicX2 = grid->isPeriodicX2();
+   gridParameters->periodicX3 = grid->isPeriodicX3();
+
+   //----------------------------------------------------------------------
+
+   Block3d* block3dArray = new Block3d[blocksCount];
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	all the blocks of the current level
+      {
+         // save data describing the block
+         block3dArray[ic].x1 = block->getX1();
+         block3dArray[ic].x2 = block->getX2();
+         block3dArray[ic].x3 = block->getX3();
+         block3dArray[ic].bundle = block->getBundle();
+         block3dArray[ic].rank = block->getRank();
+         block3dArray[ic].lrank = block->getLocalRank();
+         block3dArray[ic].part = block->getPart();
+         block3dArray[ic].globalID = block->getGlobalID();
+         block3dArray[ic].localID = block->getLocalID();
+         block3dArray[ic].level = block->getLevel();
+         block3dArray[ic].interpolationFlagCF = block->getInterpolationFlagCF();
+         block3dArray[ic].interpolationFlagFC = block->getInterpolationFlagFC();
+         block3dArray[ic].counter = block->getMaxGlobalID();
+         block3dArray[ic].active = block->isActive();
+
+         ic++;
+      }
+   }
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBlocks start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   MPI_File file_handler;
+   MPI_Info info = MPI_INFO_NULL;
+   //MPI_Info_create (&info);
+   //MPI_Info_set(info,"romio_cb_write","enable");
+   //MPI_Info_set(info,"cb_buffer_size","4194304");
+   //MPI_Info_set(info,"striping_unit","4194304");
+
+   // if (comm->isRoot())
+   // {
+   UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
+   std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler);
+   if (rc != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename);
+   // }
+
+   double start, finish;
+   MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
+
+   if (comm->isRoot())
+   {
+      start = MPI_Wtime();
+
+      // each process writes the quantity of it's blocks
+      MPI_File_write_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+      // each process writes parameters of the grid
+      MPI_File_write_at(file_handler, write_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
+      // each process writes it's blocks
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +sizeof(GridParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
+   }
+
+   MPI_File_sync(file_handler);
+   MPI_File_close(&file_handler);
+ 
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBlocks time: "<<finish-start<<" s");
+   }
+
+   delete[] block3dArray;
+   delete gridParameters;
+}
+
+void MPIIORestart11CoProcessor::writeDataSet(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   int blocksCount = 0; // quantity of blocks in the grid, max 2147483648 blocks!
+
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   DataSet* dataSetArray = new DataSet[blocksCount];
+   std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks 
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeDataSet start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   bool firstBlock = true;
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
+      {
+         dataSetArray[ic].x1 = block->getX1();     // coordinates of the block needed to find it while regenerating the grid
+         dataSetArray[ic].x2 = block->getX2();
+         dataSetArray[ic].x3 = block->getX3();
+         dataSetArray[ic].level = block->getLevel();
+         //if (block->getKernel())
+         //{
+         dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
+         dataSetArray[ic].collFactor = block->getKernel()->getCollisionFactor();
+         dataSetArray[ic].deltaT = block->getKernel()->getDeltaT();
+         dataSetArray[ic].compressible = block->getKernel()->getCompressible();
+         dataSetArray[ic].withForcing = block->getKernel()->getWithForcing();
+         //}
+         //else
+         //{
+         //   dataSetArray[ic].ghostLayerWidth = 0;
+         //   dataSetArray[ic].collFactor = 0.0;
+         //   dataSetArray[ic].deltaT = 0.0;
+         //   dataSetArray[ic].compressible = false;
+         //   dataSetArray[ic].withForcing = false;
+         //}
+         //std::cout << "ic="<<ic<<"-"<<dataSetArray[ic].x1 << "," << dataSetArray[ic].x2 << "," << dataSetArray[ic].x3 << "," << dataSetArray[ic].level << "," << dataSetArray[ic].ghostLayerWidth;
+         //std::cout << dataSetArray[ic].collFactor<<","<<dataSetArray[ic].deltaT<<","<<dataSetArray[ic].compressible<<","<<dataSetArray[ic].withForcing<<std::endl;
+         //dataSetArrayGW[ic].x1 = dataSetArray[ic].x1;
+         //dataSetArrayGW[ic].x2 = dataSetArray[ic].x2;
+         //dataSetArrayGW[ic].x3 = dataSetArray[ic].x3;
+         //dataSetArrayGW[ic].level = dataSetArray[ic].level;
+         //dataSetArrayGW[ic].ghostLayerWidth = dataSetArray[ic].ghostLayerWidth;
+         //dataSetArrayGW[ic].collFactor = dataSetArray[ic].collFactor;
+         //dataSetArrayGW[ic].deltaT = dataSetArray[ic].deltaT;
+         //dataSetArrayGW[ic].compressible = dataSetArray[ic].compressible;
+         //dataSetArrayGW[ic].withForcing = dataSetArray[ic].withForcing;
+
+         if (firstBlock /*&& block->getKernel()*/) // when first (any) valid block...
+         {
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > averageDensityArray = block->getKernel()->getDataSet()->getAverageDencity();
+            if (averageDensityArray)
+            {
+               dataSetParamStr.nx[0][0] = static_cast<int>(averageDensityArray->getNX1());
+               dataSetParamStr.nx[0][1] = static_cast<int>(averageDensityArray->getNX2());
+               dataSetParamStr.nx[0][2] = static_cast<int>(averageDensityArray->getNX3());
+               dataSetParamStr.nx[0][3] = static_cast<int>(averageDensityArray->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+            if (AverageVelocityArray3DPtr)
+            {
+               dataSetParamStr.nx[1][0] = static_cast<int>(AverageVelocityArray3DPtr->getNX1());
+               dataSetParamStr.nx[1][1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
+               dataSetParamStr.nx[1][2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
+               dataSetParamStr.nx[1][3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+            if (AverageFluctArray3DPtr)
+            {
+               dataSetParamStr.nx[2][0] = static_cast<int>(AverageFluctArray3DPtr->getNX1());
+               dataSetParamStr.nx[2][1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
+               dataSetParamStr.nx[2][2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
+               dataSetParamStr.nx[2][3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+            if (AverageTripleArray3DPtr)
+            {
+               dataSetParamStr.nx[3][0] = static_cast<int>(AverageTripleArray3DPtr->getNX1());
+               dataSetParamStr.nx[3][1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
+               dataSetParamStr.nx[3][2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
+               dataSetParamStr.nx[3][3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+            if (ShearStressValArray3DPtr)
+            {
+               dataSetParamStr.nx[4][0] = static_cast<int>(ShearStressValArray3DPtr->getNX1());
+               dataSetParamStr.nx[4][1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
+               dataSetParamStr.nx[4][2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
+               dataSetParamStr.nx[4][3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+            if (relaxationFactor3DPtr)
+            {
+               dataSetParamStr.nx[5][0] = static_cast<int>(relaxationFactor3DPtr->getNX1());
+               dataSetParamStr.nx[5][1] = static_cast<int>(relaxationFactor3DPtr->getNX2());
+               dataSetParamStr.nx[5][2] = static_cast<int>(relaxationFactor3DPtr->getNX3());
+               dataSetParamStr.nx[5][3] = 1;
+            }
+
+            boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
+            CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
+            if (localDistributions)
+            {
+               dataSetParamStr.nx[6][0] = static_cast<int>(localDistributions->getNX1());
+               dataSetParamStr.nx[6][1] = static_cast<int>(localDistributions->getNX2());
+               dataSetParamStr.nx[6][2] = static_cast<int>(localDistributions->getNX3());
+               dataSetParamStr.nx[6][3] = static_cast<int>(localDistributions->getNX4());
+            }
+
+            CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
+            if (nonLocalDistributions)
+            {
+               dataSetParamStr.nx[7][0] = static_cast<int>(nonLocalDistributions->getNX1());
+               dataSetParamStr.nx[7][1] = static_cast<int>(nonLocalDistributions->getNX2());
+               dataSetParamStr.nx[7][2] = static_cast<int>(nonLocalDistributions->getNX3());
+               dataSetParamStr.nx[7][3] = static_cast<int>(nonLocalDistributions->getNX4());
+            }
+
+            CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
+            if (zeroDistributions)
+            {
+               dataSetParamStr.nx[8][0] = static_cast<int>(zeroDistributions->getNX1());
+               dataSetParamStr.nx[8][1] = static_cast<int>(zeroDistributions->getNX2());
+               dataSetParamStr.nx[8][2] = static_cast<int>(zeroDistributions->getNX3());
+               dataSetParamStr.nx[8][3] = 1;
+            }
+
+            // ... than save some parameters that are equal in all dataSets
+            dataSetParamStr.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
+            dataSetParamStr.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
+            dataSetParamStr.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
+
+            firstBlock = false;
+
+            // how many elements are in all arrays of DataSet (equal in all blocks)
+            int doubleCount = 0, temp;
+            for (int i = 0; i<9; i++)   // 9 arrays ( averageValues, averageVelocity, averageFluktuations,
+            {                 // averageTriplecorrelations, shearStressValues, relaxationFactor, 3 * fdistributions
+               temp = 1;
+               for (int ii = 0; ii < 4; ii++)
+               {
+                  temp *= dataSetParamStr.nx[i][ii];
+                  //std::cout << ",dataSetParamStr.nx[" << i << "][" << ii << "]" << "=" << dataSetParamStr.nx[i][ii];
+               }
+               doubleCount += temp;
+            }
+            dataSetParamStr.doubleCountInBlock = doubleCount;
+         }
+         //std::cout << ",doubleCountInBlock="<<dataSetParamStr.doubleCountInBlock<< "," << dataSetParamStr.nx1 << "," << dataSetParamStr.nx2 << "," << dataSetParamStr.nx3 << std::endl;
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageValuesArray3DPtr = block->getKernel()->getDataSet()->getAverageDencity();
+         if (AverageValuesArray3DPtr&&(dataSetParamStr.nx[0][0]>0)&&(dataSetParamStr.nx[0][1]>0)&&(dataSetParamStr.nx[0][2]>0)&&(dataSetParamStr.nx[0][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageValuesArray3DPtr->getDataVector().begin(), AverageValuesArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+         if (AverageVelocityArray3DPtr&&(dataSetParamStr.nx[1][0]>0)&&(dataSetParamStr.nx[1][1]>0)&&(dataSetParamStr.nx[1][2]>0)&&(dataSetParamStr.nx[1][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+         if (AverageFluctArray3DPtr&&(dataSetParamStr.nx[2][0]>0)&&(dataSetParamStr.nx[2][1]>0)&&(dataSetParamStr.nx[2][2]>0)&&(dataSetParamStr.nx[2][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(), AverageFluctArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+         if (AverageTripleArray3DPtr&&(dataSetParamStr.nx[3][0]>0)&&(dataSetParamStr.nx[3][1]>0)&&(dataSetParamStr.nx[3][2]>0)&&(dataSetParamStr.nx[3][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+         if (ShearStressValArray3DPtr&&(dataSetParamStr.nx[4][0]>0)&&(dataSetParamStr.nx[4][1]>0)&&(dataSetParamStr.nx[4][2]>0)&&(dataSetParamStr.nx[4][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(), ShearStressValArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > RelaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+         if (RelaxationFactor3DPtr&&(dataSetParamStr.nx[5][0]>0)&&(dataSetParamStr.nx[5][1]>0)&&(dataSetParamStr.nx[5][2]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), RelaxationFactor3DPtr->getDataVector().begin(), RelaxationFactor3DPtr->getDataVector().end());
+
+         boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
+         CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
+         if (localDistributions&&(dataSetParamStr.nx[6][0]>0)&&(dataSetParamStr.nx[6][1]>0)&&(dataSetParamStr.nx[6][2]>0)&&(dataSetParamStr.nx[6][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), localDistributions->getDataVector().begin(), localDistributions->getDataVector().end());
+
+         CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
+         if (nonLocalDistributions&&(dataSetParamStr.nx[7][0]>0)&&(dataSetParamStr.nx[7][1]>0)&&(dataSetParamStr.nx[7][2]>0)&&(dataSetParamStr.nx[7][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributions->getDataVector().begin(), nonLocalDistributions->getDataVector().end());
+
+         CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
+         if (zeroDistributions&&(dataSetParamStr.nx[8][0]>0)&&(dataSetParamStr.nx[8][1]>0)&&(dataSetParamStr.nx[8][2]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributions->getDataVector().begin(), zeroDistributions->getDataVector().end());
+
+         ic++;
+      }
+   }
+
+   // register new MPI-types depending on the block-specific information
+   MPI_Type_contiguous(dataSetParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+   MPI_Type_commit(&dataSetDoubleType);
+   mpiTypeFreeFlag = true;
+
+   //doubleValuesArrayGW.assign(doubleValuesArray.begin(), doubleValuesArray.end());
+   
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeDataSet start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   // write to the file
+   // all processes calculate their offsets (quantity of bytes that the process is going to write) 
+   // and notify the next process (with the rank = rank + 1)
+   MPI_Offset write_offset = (MPI_Offset)(size * sizeof(int));
+   size_t next_write_offset = 0;
+
+   if (size>1)
+   {
+      if (rank==0)
+      {
+         next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSet)+ dataSetParamStr.doubleCountInBlock*sizeof(double));
+         MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
+      }
+      else
+      {
+         MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+         next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSet)+ dataSetParamStr.doubleCountInBlock*sizeof(double));
+         if (rank<size-1)
+            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
+      }
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_Info info = MPI_INFO_NULL;
+
+#ifdef HLRN
+   MPI_Info_create(&info);
+   MPI_Info_set(info, "striping_factor", "40");
+   MPI_Info_set(info, "striping_unit", "4M");
+#endif
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE| MPI_MODE_WRONLY, info, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   //std::cout << "writeDataSet rank=" << rank << ",blocksCount=" << blocksCount;
+   //std::cout << ", rank*sizeof(int)=" << (MPI_Offset)(rank * sizeof(int)) << ", write_offset=" << write_offset << std::endl;
+
+   // each process writes the quantity of it's blocks
+   MPI_File_write_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // each process writes common parameters of a dataSet
+   MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
+   // each process writes data identifying blocks
+   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+   // each process writes the dataSet arrays
+   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+sizeof(dataSetParam)+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+   MPI_File_sync(file_handler);
+   
+   //int blockC;
+   //MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blockC, 1, MPI_INT, MPI_STATUS_IGNORE);
+   //std::cout << "readDataSet rank=" << rank << ", blockC=" << blockC << std::endl;
+   
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeDataSet time: "<<finish-start<<" s");
+   }
+
+   delete[] dataSetArray;
+}
+
+void MPIIORestart11CoProcessor::writeBoundaryConds(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBoundaryConds start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   int blocksCount = 0;          // quantity of blocks in the grid, max 2147483648 blocks!
+   size_t count_boundCond = 0;	// how many BoundaryConditions in all blocks
+   int count_indexContainer = 0;	// how many indexContainer-values in all blocks
+   size_t byteCount = 0;			// how many bytes writes this process in the file 
+
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   BCAdd* bcAddArray = new BCAdd[blocksCount];
+   std::vector<BoundaryCondition> bcVector;
+   std::vector<int> bcindexmatrixV;
+   std::vector<int> indexContainerV;
+   bool bcindexmatrixCountNotInit = true;
+
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  // all the blocks of the current level
+      {
+         BCArray3DPtr bcArr = block->getKernel()->getBCProcessor()->getBCArray();
+
+         bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+         bcAddArray[ic].x2 = block->getX2();
+         bcAddArray[ic].x3 = block->getX3();
+         bcAddArray[ic].level = block->getLevel();
+         bcAddArray[ic].boundCond_count = 0; // how many BoundaryConditions in this block
+         bcAddArray[ic].indexContainer_count = 0;  // how many indexContainer-values in this block
+
+         for (int bc = 0; bc<bcArr->getBCVectorSize(); bc++)
+         {
+            BoundaryCondition* bouCond = new BoundaryCondition();
+            if (bcArr->bcvector[bc]==NULL)
+            {
+               memset(bouCond, 0, sizeof(BoundaryCondition));
+            }
+            else
+            {
+               bouCond->noslipBoundaryFlags = bcArr->bcvector[bc]->getNoSlipBoundary();
+               bouCond->slipBoundaryFlags = bcArr->bcvector[bc]->getSlipBoundary();
+               bouCond->velocityBoundaryFlags = bcArr->bcvector[bc]->getVelocityBoundary();
+               bouCond->densityBoundaryFlags = bcArr->bcvector[bc]->getDensityBoundary();
+               bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
+               bouCond->bcVelocityX1 = bcArr->bcvector[bc]->getBoundaryVelocityX1();
+               bouCond->bcVelocityX2 = bcArr->bcvector[bc]->getBoundaryVelocityX2();
+               bouCond->bcVelocityX3 = bcArr->bcvector[bc]->getBoundaryVelocityX3();
+               bouCond->bcDensity = bcArr->bcvector[bc]->getBoundaryDensity();
+               bouCond->bcLodiDensity = bcArr->bcvector[bc]->getDensityLodiDensity();
+               bouCond->bcLodiVelocityX1 = bcArr->bcvector[bc]->getDensityLodiVelocityX1();
+               bouCond->bcLodiVelocityX2 = bcArr->bcvector[bc]->getDensityLodiVelocityX2();
+               bouCond->bcLodiVelocityX3 = bcArr->bcvector[bc]->getDensityLodiVelocityX3();
+               bouCond->bcLodiLentgh = bcArr->bcvector[bc]->getDensityLodiLength();
+               bouCond->nx1 = bcArr->bcvector[bc]->nx1;
+               bouCond->nx2 = bcArr->bcvector[bc]->nx2;
+               bouCond->nx3 = bcArr->bcvector[bc]->nx3;
+               for (int iq = 0; iq<26; iq++)
+                  bouCond->q[iq] = bcArr->bcvector[bc]->getQ(iq);
+               bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
+            }
+            //std::cout << "writeBoundaryConds noslipBoundaryFlags="<< bouCond->noslipBoundaryFlags << std::endl;
+            bcVector.push_back(*bouCond);
+            //bcVectorGW.push_back(*bouCond);
+            //if (bcVector[count_boundCond].noslipBoundaryFlags != bcVectorGW[count_boundCond].noslipBoundaryFlags)
+            //   std::cout << "bcVector[count_boundCond].noslipBoundaryFlags != bcVectorGW[count_boundCond].noslipBoundaryFlags!!!" << std::endl;
+            bcAddArray[ic].boundCond_count++;
+            count_boundCond++;
+         }
+
+         // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D) is always equal,
+         // this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when writing/reading BoundConds
+         if (bcindexmatrixCountNotInit)
+         {
+            boundCondParamStr.nx1 = static_cast<int>(bcArr->bcindexmatrix.getNX1());
+            boundCondParamStr.nx2 = static_cast<int>(bcArr->bcindexmatrix.getNX2());
+            boundCondParamStr.nx3 = static_cast<int>(bcArr->bcindexmatrix.getNX3());
+            boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
+            bcindexmatrixCountNotInit = false;
+         }
+         bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
+
+         indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
+         bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
+         count_indexContainer += bcAddArray[ic].indexContainer_count;
+
+         ic++;
+      }
+   }
+
+   //bcindexmatrixVGW.assign(bcindexmatrixV.begin(), bcindexmatrixV.end());
+   //indexContainerVGW.assign(indexContainerV.begin(), indexContainerV.end());
+   
+   MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+   MPI_Type_commit(&bcindexmatrixType);
+   mpiTypeFreeFlag = true;
+
+   //how many "big blocks" of BLOCK_SIZE size can by formed
+   int bcBlockCount = (int)(count_boundCond/BLOCK_SIZE);
+   if (bcBlockCount * BLOCK_SIZE<count_boundCond)
+      bcBlockCount += 1;
+   for (int i = (int)count_boundCond; i<bcBlockCount * BLOCK_SIZE; i++)
+   {
+      BoundaryCondition* bouCond = new BoundaryCondition();
+      memset(bouCond, 0, sizeof(BoundaryCondition));
+      bcVector.push_back(*bouCond);
+   }
+
+   byteCount = bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount * sizeof(BCAdd) + sizeof(int) * (blocksCount * boundCondParamStr.bcindexmatrixCount + count_indexContainer);
+
+   // write to the file
+   // all processes calculate their offsets (quantity of bytes that the process is going to write) 
+   // and notify the next process (with the rank = rank + 1)
+   MPI_Offset write_offset = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
+   size_t next_write_offset = 0;
+
+   if (size>1)
+   {
+      if (rank==0)
+      {
+         next_write_offset = write_offset + byteCount;
+         MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
+      }
+      else
+      {
+         MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+         next_write_offset = write_offset + byteCount;
+         if (rank<size-1)
+            MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
+      }
+   }
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBoundaryConds start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_Info info = MPI_INFO_NULL;
+
+#ifdef HLRN
+   MPI_Info_create(&info);
+   MPI_Info_set(info, "striping_factor", "40");
+   MPI_Info_set(info, "striping_unit", "4M");
+#endif
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE|MPI_MODE_WRONLY, info, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   MPI_Offset write_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
+
+   // each process writes the quantity of it's blocks
+   MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
+   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1+sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // each process writes the quantity of indexContainer elements in all blocks
+   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1+2*sizeof(int)), &count_indexContainer, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // each process writes the quantity of bcindexmatrix elements in every block
+   MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1+3*sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+
+   //std::cout << "rank=" << rank << ",(rank*write_offset1)=" << rank*write_offset1<< ",blocksCount=" << blocksCount ;
+   //std::cout << ", " << rank*write_offset1 + sizeof(int) << ",bcBlockCount=" << bcBlockCount;
+   //std::cout << ", " << rank*write_offset1 + 2 * sizeof(int) << ",count_indexContainer=" << count_indexContainer;
+   //std::cout << ", " << rank*write_offset1 + 3 * sizeof(int) << ",boundCondParamStr=" << boundCondParamStr.bcindexmatrixCount << std::endl;
+
+   // each process writes data identifying the blocks
+   MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
+   // each process writes boundary conditions
+   if (bcVector.size()>0)
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+blocksCount*sizeof(BCAdd)), &bcVector[0], bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
+   // each process writes bcindexmatrix values
+   if (bcindexmatrixV.size()>0)
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+blocksCount*sizeof(BCAdd)+bcBlockCount*BLOCK_SIZE*sizeof(BoundaryCondition)), &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
+   // each process writes indexContainer values
+   if (indexContainerV.size()>0)
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset+blocksCount*sizeof(BCAdd)+bcBlockCount*BLOCK_SIZE*sizeof(BoundaryCondition)+blocksCount*boundCondParamStr.bcindexmatrixCount*sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
+   MPI_File_sync(file_handler);
+
+   //std::cout <<"rank="<<rank<<",blocksCount="<< blocksCount<<", "<< bcBlockCount<<", "<< count_indexContainer<<", "<< bcindexmatrixCount << std::endl;
+   //std::cout <<"rank="<<rank<<",write_offset="<< write_offset <<", "<< write_offset + blocksCount * sizeof(BCAdd) <<", "<< write_offset + blocksCount * sizeof(BCAdd) + bcBlockCount*BLOCK_SIZE * sizeof(BoundaryCondition) <<", "<< write_offset + blocksCount * sizeof(BCAdd) + bcBlockCount*BLOCK_SIZE * sizeof(BoundaryCondition) + blocksCount*bcindexmatrixCount * sizeof(int)<< std::endl;
+
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::writeBoundaryConds time: "<<finish-start<<" s");
+   }
+
+   delete[] bcAddArray;
+}
+
+//------------------------------------------- READ -----------------------------------------------
+void MPIIORestart11CoProcessor::restart(int step)
+{
+   if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart11CoProcessor restart step: "<<step);
+   if (comm->isRoot()) UBLOG(logINFO, "Load check point - start");
+   readBlocks(step);
+   readDataSet(step);
+   readBoundaryConds(step);
+   if (comm->isRoot()) UBLOG(logINFO, "Load check point - end");
+   this->reconnect(grid);
+}
+
+void MPIIORestart11CoProcessor::readBlocks(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   //MPI_Comm_size(MPI_COMM_WORLD, &size);
+   size = 1;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBlocks start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBlocks.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   // read count of blocks
+   int blocksCount = 0;
+   //MPI_File_read_at(file_handler, rank*sizeof(int), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, 0, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   Block3d* block3dArray = new Block3d[blocksCount];
+
+   // calculate the read offset
+   MPI_Offset read_offset = (MPI_Offset)(size * sizeof(int));
+
+   GridParam* gridParameters = new GridParam;
+
+   // read parameters of the grid
+   MPI_File_read_at(file_handler, read_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
+   // read all the blocks
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(GridParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
+
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBlocks time: "<<finish-start<<" s");
+   }
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBlocks start of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   // clear the grid
+   std::vector<Block3DPtr> blocksVector;
+   grid->getBlocks(0, blocksVector);
+   int del = 0;
+   BOOST_FOREACH(Block3DPtr block, blocksVector)
+   {
+      grid->deleteBlock(block);
+      del++;
+   }
+
+   // restore the grid
+   CoordinateTransformation3DPtr trafo(new CoordinateTransformation3D());
+   trafo->Tx1 = gridParameters->trafoParams[0];
+   trafo->Tx2 = gridParameters->trafoParams[1];
+   trafo->Tx3 = gridParameters->trafoParams[2];
+   trafo->Sx1 = gridParameters->trafoParams[3];
+   trafo->Sx2 = gridParameters->trafoParams[4];
+   trafo->Sx3 = gridParameters->trafoParams[5];
+   trafo->alpha = gridParameters->trafoParams[6];
+   trafo->beta = gridParameters->trafoParams[7];
+   trafo->gamma = gridParameters->trafoParams[8];
+
+   trafo->toX1factorX1 = gridParameters->trafoParams[9];
+   trafo->toX1factorX2 = gridParameters->trafoParams[10];
+   trafo->toX1factorX3 = gridParameters->trafoParams[11];
+   trafo->toX1delta = gridParameters->trafoParams[12];
+   trafo->toX2factorX1 = gridParameters->trafoParams[13];
+   trafo->toX2factorX2 = gridParameters->trafoParams[14];
+   trafo->toX2factorX3 = gridParameters->trafoParams[15];
+   trafo->toX2delta = gridParameters->trafoParams[16];
+   trafo->toX3factorX1 = gridParameters->trafoParams[17];
+   trafo->toX3factorX2 = gridParameters->trafoParams[18];
+   trafo->toX3factorX3 = gridParameters->trafoParams[19];
+   trafo->toX3delta = gridParameters->trafoParams[20];
+
+   trafo->fromX1factorX1 = gridParameters->trafoParams[21];
+   trafo->fromX1factorX2 = gridParameters->trafoParams[22];
+   trafo->fromX1factorX3 = gridParameters->trafoParams[23];
+   trafo->fromX1delta = gridParameters->trafoParams[24];
+   trafo->fromX2factorX1 = gridParameters->trafoParams[25];
+   trafo->fromX2factorX2 = gridParameters->trafoParams[26];
+   trafo->fromX2factorX3 = gridParameters->trafoParams[27];
+   trafo->fromX2delta = gridParameters->trafoParams[28];
+   trafo->fromX3factorX1 = gridParameters->trafoParams[29];
+   trafo->fromX3factorX2 = gridParameters->trafoParams[30];
+   trafo->fromX3factorX3 = gridParameters->trafoParams[31];
+   trafo->fromX3delta = gridParameters->trafoParams[32];
+
+   trafo->active = gridParameters->active;
+   trafo->transformation = gridParameters->transformation;
+
+   grid->setCoordinateTransformator(trafo);
+
+   grid->setDeltaX(gridParameters->deltaX);
+   grid->setBlockNX(gridParameters->blockNx1, gridParameters->blockNx2, gridParameters->blockNx3);
+   grid->setNX1(gridParameters->nx1);
+   grid->setNX2(gridParameters->nx2);
+   grid->setNX3(gridParameters->nx3);
+   grid->setPeriodicX1(gridParameters->periodicX1);
+   grid->setPeriodicX2(gridParameters->periodicX2);
+   grid->setPeriodicX3(gridParameters->periodicX3);
+
+   // regenerate blocks
+   for (int n = 0; n<blocksCount; n++)
+   {
+      Block3DPtr block(new Block3D(block3dArray[n].x1, block3dArray[n].x2, block3dArray[n].x3, block3dArray[n].level));
+      block->setActive(block3dArray[n].active);
+      block->setBundle(block3dArray[n].bundle);
+      block->setRank(block3dArray[n].rank);
+      block->setLocalRank(block3dArray[n].lrank);
+      block->setGlobalID(block3dArray[n].globalID);
+      block->setLocalID(block3dArray[n].localID);
+      block->setPart(block3dArray[n].part);
+      block->setLevel(block3dArray[n].level);
+      block->interpolationFlagCF = block3dArray[n].interpolationFlagCF;
+      block->interpolationFlagFC = block3dArray[n].interpolationFlagFC;
+
+      grid->addBlock(block);
+   }
+
+   delete gridParameters;
+   delete[] block3dArray;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBlocks end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+
+void MPIIORestart11CoProcessor::readDataSet(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readDataSet start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   // read count of blocks
+   int blocksCount = 0;
+   MPI_File_read_at(file_handler, (MPI_Offset)(rank*sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, (MPI_Offset)(size*sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
+   //std::cout <<"MPIIORestart11CoProcessor::readDataSet rank=" << rank <<", dataSetParamStr.doubleCountInBlock="<< dataSetParamStr.doubleCountInBlock << std::endl;
+   //std::cout << ",dataSetParamStr.nx[6][0]" << "=" << dataSetParamStr.nx[6][0] << "," << dataSetParamStr.nx[6][1] << "," << dataSetParamStr.nx[6][2] << "," << dataSetParamStr.nx[6][3];
+   //std::cout << ",doubleCountInBlock=" << dataSetParamStr.doubleCountInBlock << "," << dataSetParamStr.nx1 << "," << dataSetParamStr.nx2 << "," << dataSetParamStr.nx3 << std::endl;
+
+   DataSet* dataSetArray = new DataSet[blocksCount];
+   std::vector<double> doubleValuesArray(blocksCount * dataSetParamStr.doubleCountInBlock); // double-values in all blocks 
+   
+   // define MPI_types depending on the block-specific information
+   MPI_Type_contiguous(dataSetParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+   MPI_Type_commit(&dataSetDoubleType);
+   mpiTypeFreeFlag = true;
+   //std::cout << "MPIIORestart11CoProcessor::readDataSet rank=" << rank << " 123=" << dataSetParamStr.doubleCountInBlock << std::endl;
+
+   // calculate the read offset
+   MPI_Offset read_offset = (MPI_Offset)(size * sizeof(int));
+   size_t next_read_offset = 0;
+
+   if(size > 1)
+   {
+   	if(rank == 0)
+   	{
+   		next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSet) + dataSetParamStr.doubleCountInBlock * sizeof(double));
+   		MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
+   	}
+   	else
+   	{
+   		MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+         next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSet) + dataSetParamStr.doubleCountInBlock * sizeof(double));
+   		if(rank < size - 1)
+   			MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
+   	}
+   }
+
+   /*int chunkFlag = 0;
+
+   if (rank == 0)
+   {
+      MPI_File_read_at(file_handler, read_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+      MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+       
+      for (int i=1; i<size; i+=chunk)
+      {
+         for (int j=i; j<i+chunk; j++)
+         {
+            if (j < size)
+            {
+               MPI_Send(&chunkFlag, 1, MPI_INT, j, 77, MPI_COMM_WORLD);
+               //UBLOG(logINFO, "j= "<<j);
+            }
+         }
+         for (int j=i; j<i+chunk; j++)
+         {
+            if (j < size)
+            {
+               MPI_Recv(&chunkFlag, 1, MPI_INT, j, 77, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            }
+         }
+      }
+   }
+   else
+   {
+      MPI_Recv(&chunkFlag, 1, MPI_INT, 0, 77, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+      MPI_File_read_at(file_handler, read_offset, dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+      MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+      MPI_Send(&chunkFlag, 1, MPI_INT, 0, 77, MPI_COMM_WORLD);
+      //UBLOG(logINFO, "read rank= "<<rank);
+   }*/
+
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(dataSetParam)+blocksCount*sizeof(DataSet)), &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+   MPI_File_close(&file_handler);
+
+   //for (int ch = 0; ch < blocksCount; ch++)
+   //{
+   //   if ((dataSetArrayGW[ch].x1 != dataSetArray[ch].x1) ||
+   //      (dataSetArrayGW[ch].x2 != dataSetArray[ch].x2) ||
+   //      (dataSetArrayGW[ch].x3 != dataSetArray[ch].x3) ||
+   //      (dataSetArrayGW[ch].level != dataSetArray[ch].level) ||
+   //      (dataSetArrayGW[ch].ghostLayerWidth != dataSetArray[ch].ghostLayerWidth) ||
+   //      (dataSetArrayGW[ch].collFactor != dataSetArray[ch].collFactor) ||
+   //      (dataSetArrayGW[ch].deltaT != dataSetArray[ch].deltaT) ||
+   //      (dataSetArrayGW[ch].compressible != dataSetArray[ch].compressible) ||
+   //      (dataSetArrayGW[ch].withForcing != dataSetArray[ch].withForcing)) 
+   //      std::cout << "dataSetArrayGW != rank" << rank << ", !!!!!====="<< std::endl;
+   //}
+   //std::cout << "doubleValuesArrayGW.size" << doubleValuesArrayGW.size() << ", " << doubleValuesArray.size() << std::endl;
+   //for (int vl = 0; vl < doubleValuesArrayGW.size(); vl++)
+   //   if(doubleValuesArrayGW[vl] != doubleValuesArray[vl])
+   //      std::cout << "doubleValuesArrayGW != rank" << rank << ", !!!!!====="<< std::endl;
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readDataSet time: "<<finish-start<<" s");
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readDataSet start of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   size_t index = 0, nextVectorSize = 0;
+   std::vector<double> vectorsOfValues[9];
+   for (int n = 0; n<blocksCount; n++)
+   {
+      for (int b = 0; b<9; b++) // assign approciate vectors for 9 dataSet arrays
+      {
+         nextVectorSize = dataSetParamStr.nx[b][0]* dataSetParamStr.nx[b][1]* dataSetParamStr.nx[b][2]* dataSetParamStr.nx[b][3];
+         vectorsOfValues[b].assign(doubleValuesArray.data()+index, doubleValuesArray.data()+index+nextVectorSize);
+         index += nextVectorSize;
+      }
+
+      // fill dataSet arrays
+      AverageValuesArray3DPtr mAverageDensity;
+      if ((dataSetParamStr.nx[0][0]==0)&&(dataSetParamStr.nx[0][1]==0)&&(dataSetParamStr.nx[0][2]==0)&&(dataSetParamStr.nx[0][3]==0))
+         mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[0], dataSetParamStr.nx[0][0], dataSetParamStr.nx[0][1], dataSetParamStr.nx[0][2], dataSetParamStr.nx[0][3]));
+
+      AverageValuesArray3DPtr mAverageVelocity;
+      if ((dataSetParamStr.nx[1][0]==0)&&(dataSetParamStr.nx[1][1]==0)&&(dataSetParamStr.nx[1][2]==0)&&(dataSetParamStr.nx[1][3]==0))
+         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[1], dataSetParamStr.nx[1][0], dataSetParamStr.nx[1][1], dataSetParamStr.nx[1][2], dataSetParamStr.nx[1][3]));
+
+      AverageValuesArray3DPtr mAverageFluktuations;
+      if ((dataSetParamStr.nx[2][0]==0)&&(dataSetParamStr.nx[2][1]==0)&&(dataSetParamStr.nx[2][2]==0)&&(dataSetParamStr.nx[2][3]==0))
+         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[2], dataSetParamStr.nx[2][0], dataSetParamStr.nx[2][1], dataSetParamStr.nx[2][2], dataSetParamStr.nx[2][3]));
+
+      AverageValuesArray3DPtr mAverageTriplecorrelations;
+      if ((dataSetParamStr.nx[3][0]==0)&&(dataSetParamStr.nx[3][1]==0)&&(dataSetParamStr.nx[3][2]==0)&&(dataSetParamStr.nx[3][3]==0))
+         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[3], dataSetParamStr.nx[3][0], dataSetParamStr.nx[3][1], dataSetParamStr.nx[3][2], dataSetParamStr.nx[3][3]));
+
+      ShearStressValuesArray3DPtr mShearStressValues;
+      if ((dataSetParamStr.nx[4][0]==0)&&(dataSetParamStr.nx[4][1]==0)&&(dataSetParamStr.nx[4][2]==0)&&(dataSetParamStr.nx[4][3]==0))
+         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[4], dataSetParamStr.nx[4][0], dataSetParamStr.nx[4][1], dataSetParamStr.nx[4][2], dataSetParamStr.nx[4][3]));
+
+      RelaxationFactorArray3DPtr mRelaxationFactor;
+      if ((dataSetParamStr.nx[5][0]==0)&&(dataSetParamStr.nx[5][1]==0)&&(dataSetParamStr.nx[5][2]==0))
+         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr();
+      else
+         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[5], dataSetParamStr.nx[5][0], dataSetParamStr.nx[5][1], dataSetParamStr.nx[5][2]));
+
+      //DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector(dataSetParamStr.nx1, dataSetParamStr.nx2, dataSetParamStr.nx3, -999.0));
+      DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector());
+
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[6], dataSetParamStr.nx[6][0], dataSetParamStr.nx[6][1], dataSetParamStr.nx[6][2], dataSetParamStr.nx[6][3])));
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[7], dataSetParamStr.nx[7][0], dataSetParamStr.nx[7][1], dataSetParamStr.nx[7][2], dataSetParamStr.nx[7][3])));
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[8], dataSetParamStr.nx[8][0], dataSetParamStr.nx[8][1], dataSetParamStr.nx[8][2])));
+
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr.nx1);
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr.nx2);
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr.nx3);
+
+      DataSet3DPtr dataSetPtr = DataSet3DPtr(new DataSet3D());
+      dataSetPtr->setAverageDencity(mAverageDensity);
+      dataSetPtr->setAverageVelocity(mAverageVelocity);
+      dataSetPtr->setAverageFluctuations(mAverageFluktuations);
+      dataSetPtr->setAverageTriplecorrelations(mAverageTriplecorrelations);
+      dataSetPtr->setShearStressValues(mShearStressValues);
+      dataSetPtr->setRelaxationFactor(mRelaxationFactor);
+      dataSetPtr->setFdistributions(mFdistributions);
+
+      // find the nesessary block and fill it
+      Block3DPtr block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
+      //LBMKernelPtr kernel(new CompressibleCumulantLBMKernel());
+      //LBMKernelPtr kernel(new IncompressibleCumulantLBMKernel());
+      LBMKernelPtr kernel = this->lbmKernel->clone();
+      kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
+      kernel->setCollisionFactor(dataSetArray[n].collFactor);
+      kernel->setDeltaT(dataSetArray[n].deltaT);
+      kernel->setCompressible(dataSetArray[n].compressible);
+      kernel->setWithForcing(dataSetArray[n].withForcing);
+      kernel->setDataSet(dataSetPtr);
+      block->setKernel(kernel);
+      //block->getKernel()->setDataSet(dataSetPtr);
+   }
+
+   delete[] dataSetArray;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+
+void MPIIORestart11CoProcessor::readBoundaryConds(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBoundaryConds start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   int blocksCount = 0;
+   int dataCount1000 = 0;
+   int dataCount2 = 0;
+   MPI_Offset read_offset1 = (MPI_Offset)(rank * (3 * sizeof(int) + sizeof(boundCondParam)));
+
+   // read count of blocks
+   MPI_File_read_at(file_handler, read_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // read count of big BoundaryCondition blocks
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1+sizeof(int)), &dataCount1000, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // read count of indexContainer values in all blocks
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1+2*sizeof(int)), &dataCount2, 1, MPI_INT, MPI_STATUS_IGNORE);
+   // read count of bcindexmatrix values in every block
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1+3*sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+
+   //std::cout << "rank=" << rank << ",(rank*read_offset1)=" << rank*read_offset1 << ",blocksCount=" << blocksCount;
+   //std::cout << ", " << rank*read_offset1 + sizeof(int) << ",bcBlockCount=" << dataCount1000;
+   //std::cout << ", " << rank*read_offset1 + 2 * sizeof(int) << ",count_indexContainer=" << dataCount2;
+   //std::cout << ", " << rank*read_offset1 + 3 * sizeof(int) << ",boundCondParamStr=" << boundCondParamStr.bcindexmatrixCount << std::endl;
+   //std::cout << "readrank=" << rank << ",blocksCount=" << blocksCount << ", " << dataCount1000 << ", " << dataCount2 << ", " << boundCondParamStr.bcindexmatrixCount << std::endl;
+
+   MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+   MPI_Type_commit(&bcindexmatrixType);
+   mpiTypeFreeFlag = true;
+
+   size_t dataCount = dataCount1000 * BLOCK_SIZE;
+   BCAdd* bcAddArray = new BCAdd[blocksCount];
+   BoundaryCondition* bcArray = new BoundaryCondition[dataCount];
+   BoundaryCondition* nullBouCond = new BoundaryCondition();
+   memset(nullBouCond, 0, sizeof(BoundaryCondition));
+   int* intArray1 = new int[blocksCount * boundCondParamStr.bcindexmatrixCount];
+   int* intArray2 = new int[dataCount2];
+
+   MPI_Offset read_offset = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
+   size_t next_read_offset = 0;
+
+   if (size>1)
+   {
+      if (rank==0)
+      {
+         next_read_offset = read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+(blocksCount * boundCondParamStr.bcindexmatrixCount + dataCount2)*sizeof(int);
+         MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
+      }
+      else
+      {
+         MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank-1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+         next_read_offset = read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+(blocksCount * boundCondParamStr.bcindexmatrixCount + dataCount2)*sizeof(int);
+         if (rank<size-1)
+            MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank+1, 5, MPI_COMM_WORLD);
+      }
+   }
+   //std::cout << "readrank=" << rank << ",read_offset=" << read_offset << ", " << read_offset + blocksCount * sizeof(BCAdd) << ", " << read_offset + blocksCount * sizeof(BCAdd) + dataCount * sizeof(BoundaryCondition) << ", " << read_offset + blocksCount * sizeof(BCAdd) + dataCount * sizeof(BoundaryCondition) + blocksCount * bcindexmatrixCount * sizeof(int) << std::endl;
+
+   MPI_File_read_at(file_handler, read_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)), &bcArray[0], dataCount1000, boundCondType1000, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)), &intArray1[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+blocksCount*sizeof(BCAdd)+dataCount*sizeof(BoundaryCondition)+blocksCount * boundCondParamStr.bcindexmatrixCount*sizeof(int)), &intArray2[0], dataCount2, MPI_INT, MPI_STATUS_IGNORE);
+   //MPI_File_sync(file_handler);
+
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBoundaryConds time: "<<finish-start<<" s");
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBoundaryConds start of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   int index = 0, index1 = 0, index2 = 0;
+   std::vector<BoundaryConditionsPtr> bcVector;
+   std::vector<int> bcindexmatrixV;
+   std::vector<int> indexContainerV;
+
+   for (size_t n = 0; n<blocksCount; n++)
+   {
+      bcVector.resize(0);
+      bcindexmatrixV.resize(0);
+      indexContainerV.resize(0);
+
+      for (size_t ibc = 0; ibc<bcAddArray[n].boundCond_count; ibc++)
+      {
+         BoundaryConditionsPtr bc;
+         if (memcmp(&bcArray[index], nullBouCond, sizeof(BoundaryCondition))==0)
+            bc = BoundaryConditionsPtr();
+         else
+         {
+            bc = BoundaryConditionsPtr(new BoundaryConditions);
+            bc->noslipBoundaryFlags = bcArray[index].noslipBoundaryFlags;
+            bc->slipBoundaryFlags = bcArray[index].slipBoundaryFlags;
+            bc->densityBoundaryFlags = bcArray[index].densityBoundaryFlags;
+            bc->velocityBoundaryFlags = bcArray[index].velocityBoundaryFlags;
+            bc->wallModelBoundaryFlags = bcArray[index].wallModelBoundaryFlags;
+            bc->bcVelocityX1 = bcArray[index].bcVelocityX1;
+            bc->bcVelocityX2 = bcArray[index].bcVelocityX2;
+            bc->bcVelocityX3 = bcArray[index].bcVelocityX3;
+            bc->bcDensity = bcArray[index].bcDensity;
+            bc->bcLodiDensity = bcArray[index].bcLodiDensity;
+            bc->bcLodiVelocityX1 = bcArray[index].bcLodiVelocityX1;
+            bc->bcLodiVelocityX2 = bcArray[index].bcLodiVelocityX2;
+            bc->bcLodiVelocityX3 = bcArray[index].bcLodiVelocityX3;
+            bc->bcLodiLentgh = bcArray[index].bcLodiLentgh;
+
+            bc->nx1 = bcArray[index].nx1;
+            bc->nx2 = bcArray[index].nx2;
+            bc->nx3 = bcArray[index].nx3;
+            for (int iq = 0; iq<26; iq++)
+               bc->setQ(bcArray[index].q[iq], iq);
+            bc->setBcAlgorithmType(bcArray[index].algorithmType);
+
+            //if (bcVectorGW[index].nx1 != bc->nx1)
+            //   std::cout << "readBoundaryConds nx1 !!!!===" << bcVectorGW[index].nx1 << " ---- " << bc->nx1 << std::endl;
+            //if (bcVectorGW[index].nx2 != bc->nx2)
+            //   std::cout << "readBoundaryConds nx2 !!!!===" << bcVectorGW[index].nx2 << " ---- " << bc->nx2 << std::endl;
+            //if (bcVectorGW[index].nx3 != bc->nx3)
+            //   std::cout << "readBoundaryConds nx3 !!!!===" << bcVectorGW[index].nx3 << " ---- " << bc->nx3 << std::endl;
+            //if (bcVectorGW[index].algorithmType != bc->algorithmType)
+            //   std::cout << "readBoundaryConds algorithmType !!!!===" << bcVectorGW[index].algorithmType << " ---- " << bc->algorithmType << std::endl;
+            //for (int iq = 0; iq<26; iq++)
+            //   if (bcVectorGW[index].q[iq] != bc->q[iq])
+            //   std::cout << "readBoundaryConds q !!!!===" /*<< bcVectorGW[index].q << " ---- " << bc->q*/ << std::endl;
+            //std::cout << "readBoundaryConds BoundaryConditionsPtr !!!!===" <<std::endl;
+
+         }
+
+         bcVector.push_back(bc);
+         index++;
+      }
+
+      for (int b1 = 0; b1 < boundCondParamStr.bcindexmatrixCount; b1++)
+      {
+         //if (bcindexmatrixVGW[index1] != intArray1[index1])
+         //   std::cout << "readBoundaryConds bcindexmatrixVGW !!!!===" << std::endl;
+         bcindexmatrixV.push_back(intArray1[index1++]);
+      }
+      for (int b2 = 0; b2 < bcAddArray[n].indexContainer_count; b2++)
+      {
+         //if (indexContainerVGW[index2] != intArray2[index2])
+         //   std::cout << "readBoundaryConds indexContainerVGW !!!!===" << std::endl;
+         indexContainerV.push_back(intArray2[index2++]);
+      }
+
+      CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2, boundCondParamStr.nx3);
+
+      Block3DPtr block = grid->getBlock(bcAddArray[n].x1, bcAddArray[n].x2, bcAddArray[n].x3, bcAddArray[n].level);
+      //if(!block) std::cout << "readBoundaryConds can't find the block!!!" << std::endl;
+      BCProcessorPtr bcProc = bcProcessor->clone(block->getKernel());
+      //if(!bcProc) std::cout << "readBoundaryConds can't find the bcProc!!!" << std::endl;
+      BCArray3DPtr bcArr(new BCArray3D());
+      bcArr->bcindexmatrix = bcim;
+      bcArr->bcvector = bcVector;
+      bcArr->indexContainer = indexContainerV;
+      bcProc->setBCArray(bcArr);
+      
+      //if (!(block->getKernel())) 
+      //   std::cout << "readBoundaryConds kernel=" << block->getKernel() <<" "<< bcAddArray[n].x1 << " " << bcAddArray[n].x2 << " " << bcAddArray[n].x3 << std::endl;
+      block->getKernel()->setBCProcessor(bcProc);
+   }
+
+   delete nullBouCond;
+   delete[] bcArray;
+   delete[] bcAddArray;
+   delete[] intArray1;
+   delete[] intArray2;
+   
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart11CoProcessor::readBoundaryConds end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::setChunk(int val)
+{
+   chunk = val;
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::setLBMKernel(LBMKernelPtr kernel)
+{
+   this->lbmKernel = kernel;
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart11CoProcessor::setBCProcessor(BCProcessorPtr bcProcessor)
+{
+   this->bcProcessor = bcProcessor;
+}
+
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h b/source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.h
similarity index 73%
rename from source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
rename to source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.h
index c9c9e28e9..56ea4d9a5 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart11CoProcessor.h
@@ -1,187 +1,193 @@
-#ifndef _MPIIORestartCoProcessor_H_
-#define _MPIIORestartCoProcessor_H_
-
-#include "mpi.h"
-
-#include "CoProcessor.h"
-#include "Communicator.h"
-#include "WbWriter.h"
-
-#include <boost/shared_ptr.hpp>
-
-class MPIIORestartCoProcessor;
-typedef boost::shared_ptr<MPIIORestartCoProcessor> MPIIORestartCoProcessorPtr;
-
-//! \class MPIWriteBlocksCoProcessor 
-//! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating  
-class MPIIORestartCoProcessor: public CoProcessor
-{
-   //! \struct GridParam
-   //! \brief Structure describes parameters of the grid
-   //! \details The structure is nessasary to restore the grid correctly
-   struct GridParam
-    {
-      double trafoParams[33];
-      double deltaX;
-      int blockNx1;
-      int blockNx2;
-      int blockNx3;
-      int nx1;
-      int nx2;
-      int nx3;
-      bool periodicX1;
-      bool periodicX2;
-      bool periodicX3;
-      bool active;
-      bool transformation;
-    };
-
-   //! \struct blockParam
-   //! \brief Structure describes parameters of the block that are equal in all blocks
-   //! \details The structure used to store some parameters needed to restore dataSet arrays
-   struct BlockParam
-    {
-       int nx1;   //	to find the right block
-       int nx2;
-       int nx3;
-       int nx[9][4]; // 9 arrays x (nx1, nx2, nx3, nx4)
-       int doubleCountInBlock;   // how many double-values are in all arrays dataSet in one (any) block
-       int bcindexmatrix_count;	// how many bcindexmatrix-values are in one (any) block 
-    };
-
-   //! \struct Block3d
-   //! \brief Structure contains information of the block
-   //! \details The structure is used to write the data describing the block in the grid when saving the grid 
-   //! and to read it when restoring the grid
-   struct Block3d
-	{
-		//double collFactor;
-		//double deltaT;
-		int x1;
-		int x2;
-		int x3;
-		int bundle;
-		int rank;
-		int lrank;
-		int part;
-		int globalID;
-		int localID;
-		int level;
-		int interpolationFlagCF;
-		int interpolationFlagFC;
-		int counter;
-		//int ghostLayerWidth;
-		bool active;
-		//bool compressible;
-		//bool withForcing;
-	};
-
-   //! \struct dataSet
-   //! \brief Structure containes information identifying the block 
-   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
-   struct DataSet
-	{
-      double collFactor;
-      double deltaT;
-      int x1;  
-		int x2;  
-		int x3;  
-		int level;
-      int ghostLayerWidth;
-      bool compressible;
-      bool withForcing;
-   };
-   
-   //! \struct BoundaryCondition
-   //! \brief Structure containes information about boundary conditions of the block 
-   //! \details The structure is used to write data describing boundary conditions of the blocks when saving the grid 
-   //! and to read it when restoring the grid
-   struct BoundaryCondition
-	{
-		long long noslipBoundaryFlags;	//	MPI_LONG_LONG
-		long long slipBoundaryFlags;		
-		long long velocityBoundaryFlags;		
-		long long densityBoundaryFlags;		
-		long long wallModelBoundaryFlags;
-		
-		float  bcVelocityX1;
-		float  bcVelocityX2;
-		float  bcVelocityX3;
-		float  bcDensity;
-		
-		float  bcLodiDensity;
-		float  bcLodiVelocityX1;
-		float  bcLodiVelocityX2;
-		float  bcLodiVelocityX3;
-		float  bcLodiLentgh;
-		
-		float  nx1,nx2,nx3;
-		float q[26];					//	MPI_FLOAT
-
-      char algorithmType;
-   };
-
-   //! \struct BCAdd
-   //! \brief Structure containes information identifying the block 
-   //! and some parameters of the arrays of boundary conditions that are equal in all blocks
-   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
-   //! and to set common parameters
-   struct BCAdd
-	{
-		int x1;		//	to find the right block
-		int x2;		
-		int x3;		
-		int level;	
-      int boundCond_count;		//	how many BoundaryCondition-structures are in this block
-      int indexContainer_count;	// how many indexContainer-values are in this block
-   };
-
-public:
-   MPIIORestartCoProcessor(Grid3DPtr grid, UbSchedulerPtr s, const std::string& path, CommunicatorPtr comm);
-   virtual ~MPIIORestartCoProcessor();
-   //! Each timestep writes the grid into the files
-   void process(double step);
-   //! Reads the grid from the files before grid reconstruction
-   void restart(int step);
-   //! Writes the blocks of the grid into the file outputBlocks.bin
-   void writeBlocks(int step);
-   //! Writes the datasets of the blocks into the file outputDataSet.bin
-   void writeDataSet(int step);
-   //! Writes the boundary conditions of the blocks into the file outputBoundCond.bin
-   void writeBoundaryConds(int step);
-   //! Reads the blocks of the grid from the file outputBlocks.bin
-   void readBlocks(int step);
-   //! Reads the datasets of the blocks from the file outputDataSet.bin
-   void readDataSet(int step);
-   //! Reads the boundary conditions of the blocks from the file outputBoundCond.bin
-   void readBoundaryConds(int step);
-   //! The function sets number of ranks that read simultaneously 
-   void setChunk(int val);
-   //! The function sets LBMKernel
-   void setLBMKernel(LBMKernelPtr kernel);
-   //!The function sets BCProcessor
-   void setBCProcessor(BCProcessorPtr bcProcessor);
-   //!The function truncates the data files
-   void clearAllFiles(int step);
-
-protected:
-   std::string path;
-   CommunicatorPtr comm;
-   bool mpiTypeFreeFlag;
-
-private:
-	MPI_Datatype gridParamType, blockParamType, block3dType, dataSetType, dataSetDoubleType, boundCondType, boundCondType1000, boundCondTypeAdd, bcindexmatrixType;
-   BlockParam blockParamStr;
-   int chunk;
-   LBMKernelPtr lbmKernel;
-   BCProcessorPtr bcProcessor;
-
-   //DataSet dataSetArrayGW[32];
-   //std::vector<double> doubleValuesArrayGW;
-   //std::vector<BoundaryCondition> bcVectorGW;
-   //std::vector<int> bcindexmatrixVGW;
-   //std::vector<int> indexContainerVGW;
-
-};
-
-#endif 
+#ifndef _MPIIORestart11CoProcessor_H_
+#define _MPIIORestart11CoProcessor_H_
+
+#include "mpi.h"
+
+#include "CoProcessor.h"
+#include "Communicator.h"
+#include "WbWriter.h"
+
+#include <boost/shared_ptr.hpp>
+
+class MPIIORestart11CoProcessor;
+typedef boost::shared_ptr<MPIIORestart11CoProcessor> MPIIORestart11CoProcessorPtr;
+
+//! \class MPIWriteBlocksCoProcessor 
+//! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating  
+class MPIIORestart11CoProcessor: public CoProcessor
+{
+   //! \struct GridParam
+   //! \brief Structure describes parameters of the grid
+   //! \details The structure is nessasary to restore the grid correctly
+   struct GridParam
+    {
+      double trafoParams[33];
+      double deltaX;
+      int blockNx1;
+      int blockNx2;
+      int blockNx3;
+      int nx1;
+      int nx2;
+      int nx3;
+      bool periodicX1;
+      bool periodicX2;
+      bool periodicX3;
+      bool active;
+      bool transformation;
+    };
+
+   //! \struct Block3d
+   //! \brief Structure contains information of the block
+   //! \details The structure is used to write the data describing the block in the grid when saving the grid 
+   //! and to read it when restoring the grid
+   struct Block3d
+	{
+		int x1;
+		int x2;
+		int x3;
+		int bundle;
+		int rank;
+		int lrank;
+		int part;
+		int globalID;
+		int localID;
+		int level;
+		int interpolationFlagCF;
+		int interpolationFlagFC;
+		int counter;
+		bool active;
+	};
+
+   //! \struct dataSetParam
+   //! \brief Structure describes parameters of the dataSet that are equal in all blocks
+   //! \details The structure used to store some parameters needed to restore dataSet arrays
+   struct dataSetParam
+   {
+      int nx1;   //	to find the right block
+      int nx2;
+      int nx3;
+      int nx[9][4]; // 9 arrays x (nx1, nx2, nx3, nx4)
+      int doubleCountInBlock;   // how many double-values are in all arrays dataSet in one (any) block
+   };
+
+   //! \struct dataSet
+   //! \brief Structure containes information identifying the block 
+   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
+   struct DataSet
+	{
+      double collFactor;
+      double deltaT;
+      int x1;  
+		int x2;  
+		int x3;  
+		int level;
+      int ghostLayerWidth;
+      bool compressible;
+      bool withForcing;
+   };
+   
+   //! \struct BoundaryCondition
+   //! \brief Structure containes information about boundary conditions of the block 
+   //! \details The structure is used to write data describing boundary conditions of the blocks when saving the grid 
+   //! and to read it when restoring the grid
+   struct BoundaryCondition
+	{
+		long long noslipBoundaryFlags;	//	MPI_LONG_LONG
+		long long slipBoundaryFlags;		
+		long long velocityBoundaryFlags;		
+		long long densityBoundaryFlags;		
+		long long wallModelBoundaryFlags;
+		
+		float  bcVelocityX1;
+		float  bcVelocityX2;
+		float  bcVelocityX3;
+		float  bcDensity;
+		
+		float  bcLodiDensity;
+		float  bcLodiVelocityX1;
+		float  bcLodiVelocityX2;
+		float  bcLodiVelocityX3;
+		float  bcLodiLentgh;
+		
+		float  nx1,nx2,nx3;
+		float q[26];					//	MPI_FLOAT
+
+      char algorithmType;
+   };
+
+   //! \struct boundCondParam
+   //! \brief Structure describes parameters of the boundaryConditions that are equal in all blocks
+   //! \details The structure used to store some parameters needed to restore boundaryConditions arrays
+   struct boundCondParam
+   {
+      int nx1;
+      int nx2;
+      int nx3;
+      int bcindexmatrixCount;	// how many bcindexmatrix-values in one (any) block 
+   };
+   
+   //! \struct BCAdd
+   //! \brief Structure containes information identifying the block 
+   //! and some parameters of the arrays of boundary conditions that are equal in all blocks
+   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
+   //! and to set common parameters
+   struct BCAdd
+	{
+		int x1;		//	to find the right block
+		int x2;		
+		int x3;		
+		int level;	
+      int boundCond_count;		//	how many BoundaryCondition-structures are in this block
+      int indexContainer_count;	// how many indexContainer-values are in this block
+   };
+
+public:
+   MPIIORestart11CoProcessor(Grid3DPtr grid, UbSchedulerPtr s, const std::string& path, CommunicatorPtr comm);
+   virtual ~MPIIORestart11CoProcessor();
+   //! Each timestep writes the grid into the files
+   void process(double step);
+   //! Reads the grid from the files before grid reconstruction
+   void restart(int step);
+   //! Writes the blocks of the grid into the file outputBlocks.bin
+   void writeBlocks(int step);
+   //! Writes the datasets of the blocks into the file outputDataSet.bin
+   void writeDataSet(int step);
+   //! Writes the boundary conditions of the blocks into the file outputBoundCond.bin
+   void writeBoundaryConds(int step);
+   //! Reads the blocks of the grid from the file outputBlocks.bin
+   void readBlocks(int step);
+   //! Reads the datasets of the blocks from the file outputDataSet.bin
+   void readDataSet(int step);
+   //! Reads the boundary conditions of the blocks from the file outputBoundCond.bin
+   void readBoundaryConds(int step);
+   //! The function sets number of ranks that read simultaneously 
+   void setChunk(int val);
+   //! The function sets LBMKernel
+   void setLBMKernel(LBMKernelPtr kernel);
+   //!The function sets BCProcessor
+   void setBCProcessor(BCProcessorPtr bcProcessor);
+   //!The function truncates the data files
+   void clearAllFiles(int step);
+
+protected:
+   std::string path;
+   CommunicatorPtr comm;
+   bool mpiTypeFreeFlag;
+
+private:
+	MPI_Datatype gridParamType, block3dType, dataSetParamType, dataSetType, dataSetDoubleType, boundCondParamType, boundCondType, boundCondType1000, boundCondTypeAdd, bcindexmatrixType;
+   dataSetParam dataSetParamStr;
+   boundCondParam boundCondParamStr;
+   int chunk;
+   LBMKernelPtr lbmKernel;
+   BCProcessorPtr bcProcessor;
+
+   /*DataSet dataSetArrayGW[128];
+   std::vector<double> doubleValuesArrayGW;
+   std::vector<BoundaryCondition> bcVectorGW;
+   std::vector<int> bcindexmatrixVGW;
+   std::vector<int> indexContainerVGW;*/
+
+};
+
+#endif 
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.cpp b/source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.cpp
similarity index 97%
rename from source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.cpp
rename to source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.cpp
index 3c71c83fe..dfdfeaa68 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.cpp
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.cpp
@@ -1,4 +1,4 @@
-#include "MPIIORestart3CoProcessor.h"
+#include "MPIIORestart1CoProcessor.h"
 #include <boost/foreach.hpp>
 #include "D3Q27System.h"
 //#include "LBMKernel.h"
@@ -14,7 +14,7 @@
 //! structures BoundaryCondition are being written in blocks containing each of them BLOCK_SIZE structures
 #define BLOCK_SIZE 1024
 
-MPIIORestart3CoProcessor::MPIIORestart3CoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
+MPIIORestart1CoProcessor::MPIIORestart1CoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
    const std::string& path,
    CommunicatorPtr comm) :
    CoProcessor(grid, s),
@@ -103,7 +103,7 @@ MPIIORestart3CoProcessor::MPIIORestart3CoProcessor(Grid3DPtr grid, UbSchedulerPt
 
 }
 //////////////////////////////////////////////////////////////////////////
-MPIIORestart3CoProcessor::~MPIIORestart3CoProcessor()
+MPIIORestart1CoProcessor::~MPIIORestart1CoProcessor()
 {
    MPI_Type_free(&gridParamType);
    MPI_Type_free(&blockParamType);
@@ -121,7 +121,7 @@ MPIIORestart3CoProcessor::~MPIIORestart3CoProcessor()
 }
 
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::process(double step)
+void MPIIORestart1CoProcessor::process(double step)
 {
    if (scheduler->isDue(step))
    {
@@ -137,7 +137,7 @@ void MPIIORestart3CoProcessor::process(double step)
    }
 }
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::clearAllFiles(int step)
+void MPIIORestart1CoProcessor::clearAllFiles(int step)
 {
    MPI_File file_handler1, file_handler2, file_handler3;
    MPI_Info info = MPI_INFO_NULL;
@@ -169,7 +169,7 @@ void MPIIORestart3CoProcessor::clearAllFiles(int step)
    MPI_File_close(&file_handler3);
 }
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::writeBlocks(int step)
+void MPIIORestart1CoProcessor::writeBlocks(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -446,7 +446,7 @@ void MPIIORestart3CoProcessor::writeBlocks(int step)
    delete gridParameters;
 }
 
-void MPIIORestart3CoProcessor::writeDataSet(int step)
+void MPIIORestart1CoProcessor::writeDataSet(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -624,7 +624,7 @@ void MPIIORestart3CoProcessor::writeDataSet(int step)
    delete[] dataSetArray;
 }
 
-void MPIIORestart3CoProcessor::writeBoundaryConds(int step)
+void MPIIORestart1CoProcessor::writeBoundaryConds(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -811,7 +811,7 @@ void MPIIORestart3CoProcessor::writeBoundaryConds(int step)
 }
 
 //------------------------------------------- READ -----------------------------------------------
-void MPIIORestart3CoProcessor::restart(int step)
+void MPIIORestart1CoProcessor::restart(int step)
 {
    if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart3CoProcessor restart step: "<<step);
    if (comm->isRoot()) UBLOG(logINFO, "Load check point - start");
@@ -822,7 +822,7 @@ void MPIIORestart3CoProcessor::restart(int step)
    this->reconnect(grid);
 }
 
-void MPIIORestart3CoProcessor::readBlocks(int step)
+void MPIIORestart1CoProcessor::readBlocks(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -974,7 +974,7 @@ void MPIIORestart3CoProcessor::readBlocks(int step)
    }
 }
 
-void MPIIORestart3CoProcessor::readDataSet(int step)
+void MPIIORestart1CoProcessor::readDataSet(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -1132,7 +1132,8 @@ void MPIIORestart3CoProcessor::readDataSet(int step)
       else
          mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[5], blockParamStr.nx[5][0], blockParamStr.nx[5][1], blockParamStr.nx[5][2]));
 
-      DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector(blockParamStr.nx1, blockParamStr.nx2, blockParamStr.nx3, -999.0));
+      //DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector(blockParamStr.nx1, blockParamStr.nx2, blockParamStr.nx3, -999.0));
+      DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector());
 
       boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[6], blockParamStr.nx[6][0], blockParamStr.nx[6][1], blockParamStr.nx[6][2], blockParamStr.nx[6][3])));
       boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[7], blockParamStr.nx[7][0], blockParamStr.nx[7][1], blockParamStr.nx[7][2], blockParamStr.nx[7][3])));
@@ -1175,7 +1176,7 @@ void MPIIORestart3CoProcessor::readDataSet(int step)
    }
 }
 
-void MPIIORestart3CoProcessor::readBoundaryConds(int step)
+void MPIIORestart1CoProcessor::readBoundaryConds(int step)
 {
    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -1309,14 +1310,26 @@ void MPIIORestart3CoProcessor::readBoundaryConds(int step)
       CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, blockParamStr.nx1, blockParamStr.nx2, blockParamStr.nx3);
 
       Block3DPtr block = grid->getBlock(bcAddArray[n].x1, bcAddArray[n].x2, bcAddArray[n].x3, bcAddArray[n].level);
-      BCProcessorPtr bcProc = bcProcessor->clone(block->getKernel());
+      
+      LBMKernelPtr kernel = block->getKernel();
+
+      if (!kernel)
+      {
+         throw UbException(UB_EXARGS, "LBMKernel in " + block->toString() + "is not exist!");
+      }
+      
+      //BCProcessorPtr bcProc = bcProcessor->clone(block->getKernel());
+      BCProcessorPtr bcProc = bcProcessor->clone(kernel);
+      
       BCArray3DPtr bcArr(new BCArray3D());
       bcArr->bcindexmatrix = bcim;
       bcArr->bcvector = bcVector;
       bcArr->indexContainer = indexContainerV;
       bcProc->setBCArray(bcArr);
       
-      block->getKernel()->setBCProcessor(bcProc);
+      
+      //block->getKernel()->setBCProcessor(bcProc);
+      kernel->setBCProcessor(bcProc);
    }
 
 /*   for (int b1 = 0; b1 < bcindexmatrixVGW.size(); b1++)
@@ -1343,17 +1356,17 @@ void MPIIORestart3CoProcessor::readBoundaryConds(int step)
    }
 }
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::setChunk(int val)
+void MPIIORestart1CoProcessor::setChunk(int val)
 {
    chunk = val;
 }
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::setLBMKernel(LBMKernelPtr kernel)
+void MPIIORestart1CoProcessor::setLBMKernel(LBMKernelPtr kernel)
 {
    this->lbmKernel = kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void MPIIORestart3CoProcessor::setBCProcessor(BCProcessorPtr bcProcessor)
+void MPIIORestart1CoProcessor::setBCProcessor(BCProcessorPtr bcProcessor)
 {
    this->bcProcessor = bcProcessor;
 }
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.h b/source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.h
similarity index 95%
rename from source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.h
rename to source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.h
index 7653fc0c8..5dcd74214 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestart3CoProcessor.h
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart1CoProcessor.h
@@ -9,12 +9,12 @@
 
 #include <boost/shared_ptr.hpp>
 
-class MPIIORestart3CoProcessor;
-typedef boost::shared_ptr<MPIIORestart3CoProcessor> MPIIORestart3CoProcessorPtr;
+class MPIIORestart1CoProcessor;
+typedef boost::shared_ptr<MPIIORestart1CoProcessor> MPIIORestart1CoProcessorPtr;
 
 //! \class MPIWriteBlocksCoProcessor 
 //! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating  
-class MPIIORestart3CoProcessor: public CoProcessor
+class MPIIORestart1CoProcessor: public CoProcessor
 {
    //! \struct GridParam
    //! \brief Structure describes parameters of the grid
@@ -137,8 +137,8 @@ class MPIIORestart3CoProcessor: public CoProcessor
    };
 
 public:
-   MPIIORestart3CoProcessor(Grid3DPtr grid, UbSchedulerPtr s, const std::string& path, CommunicatorPtr comm);
-   virtual ~MPIIORestart3CoProcessor();
+   MPIIORestart1CoProcessor(Grid3DPtr grid, UbSchedulerPtr s, const std::string& path, CommunicatorPtr comm);
+   virtual ~MPIIORestart1CoProcessor();
    //! Each timestep writes the grid into the files
    void process(double step);
    //! Reads the grid from the files before grid reconstruction
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.cpp b/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.cpp
new file mode 100644
index 000000000..4ecd193eb
--- /dev/null
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.cpp
@@ -0,0 +1,1225 @@
+#include "MPIIORestart21CoProcessor.h"
+#include <boost/foreach.hpp>
+#include "D3Q27System.h"
+#include "CompressibleCumulantLBMKernel.h"
+#include "D3Q27EsoTwist3DSplittedVector.h"
+#include <UbSystem.h>
+#include <MemoryUtil.h>
+#include "RenumberBlockVisitor.h"
+
+MPIIORestart21CoProcessor::MPIIORestart21CoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
+   const std::string& path,
+   CommunicatorPtr comm) :
+   CoProcessor(grid, s),
+   path(path),
+   comm(comm),
+   mpiTypeFreeFlag(false)
+{
+   UbSystem::makeDirectory(path+"/mpi_io_cp");
+
+   memset(&dataSetParamStr, 0, sizeof(dataSetParamStr));
+   memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
+   
+   //-------------------------   define MPI types  ---------------------------------
+
+   MPI_Datatype typesGP[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
+   int blocksGP[3] = { 34, 6, 5 };
+   MPI_Aint offsetsGP[3], lbGP, extentGP;
+
+   offsetsGP[0] = 0;
+   MPI_Type_get_extent(MPI_DOUBLE, &lbGP, &extentGP);
+   offsetsGP[1] = blocksGP[0]*extentGP;
+
+   MPI_Type_get_extent(MPI_INT, &lbGP, &extentGP);
+   offsetsGP[2] = offsetsGP[1]+blocksGP[1]*extentGP;
+
+   MPI_Type_create_struct(3, blocksGP, offsetsGP, typesGP, &gridParamType);
+   MPI_Type_commit(&gridParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Type_contiguous(40, MPI_INT, &dataSetParamType);
+   MPI_Type_commit(&dataSetParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesBlock[2] = { MPI_INT, MPI_CHAR };
+   int blocksBlock[2] = { 13, 1 };
+   MPI_Aint offsetsBlock[2], lbBlock, extentBlock;
+
+   offsetsBlock[0] = 0;
+   MPI_Type_get_extent(MPI_INT, &lbBlock, &extentBlock);
+   offsetsBlock[1] = blocksBlock[0]*extentBlock;
+
+   MPI_Type_create_struct(2, blocksBlock, offsetsBlock, typesBlock, &block3dType);
+   MPI_Type_commit(&block3dType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesDataSet[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
+   int blocksDataSet[3] = { 2, 2, 2 };
+   MPI_Aint offsetsDatatSet[3], lbDataSet, extentDataSet;
+
+   offsetsDatatSet[0] = 0;
+   MPI_Type_get_extent(MPI_DOUBLE, &lbDataSet, &extentDataSet);
+   offsetsDatatSet[1] = blocksDataSet[0]*extentDataSet;
+
+   MPI_Type_get_extent(MPI_INT, &lbDataSet, &extentDataSet);
+   offsetsDatatSet[2] = offsetsDatatSet[1]+blocksDataSet[1]*extentDataSet;
+
+   MPI_Type_create_struct(3, blocksDataSet, offsetsDatatSet, typesDataSet, &dataSetType);
+   MPI_Type_commit(&dataSetType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Type_contiguous(4, MPI_INT, &boundCondParamType);
+   MPI_Type_commit(&boundCondParamType);
+
+   //-----------------------------------------------------------------------
+
+   MPI_Datatype typesBC[3] = { MPI_LONG_LONG_INT, MPI_FLOAT, MPI_CHAR };
+   int blocksBC[3] = { 5, 39, 1 };
+   MPI_Aint offsetsBC[3], lbBC, extentBC;
+
+   offsetsBC[0] = 0;
+   MPI_Type_get_extent(MPI_LONG_LONG_INT, &lbBC, &extentBC);
+   offsetsBC[1] = blocksBC[0]*extentBC;
+
+   MPI_Type_get_extent(MPI_FLOAT, &lbBC, &extentBC);
+   offsetsBC[2] = offsetsBC[1]+blocksBC[1]*extentBC;
+
+   MPI_Type_create_struct(3, blocksBC, offsetsBC, typesBC, &boundCondType);
+   MPI_Type_commit(&boundCondType);
+
+   //---------------------------------------
+
+   MPI_Type_contiguous(3, MPI_INT, &boundCondTypeAdd);
+   MPI_Type_commit(&boundCondTypeAdd);
+
+}
+//////////////////////////////////////////////////////////////////////////
+MPIIORestart21CoProcessor::~MPIIORestart21CoProcessor()
+{
+   MPI_Type_free(&gridParamType);
+   MPI_Type_free(&dataSetParamType);
+   MPI_Type_free(&block3dType);
+   MPI_Type_free(&dataSetType);
+   MPI_Type_free(&boundCondParamType);
+   MPI_Type_free(&boundCondType);
+   MPI_Type_free(&boundCondTypeAdd);
+
+   if (mpiTypeFreeFlag)
+   {
+      MPI_Type_free(&dataSetDoubleType);
+      MPI_Type_free(&bcindexmatrixType);
+   }
+}
+
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart21CoProcessor::process(double step)
+{
+   if (scheduler->isDue(step))
+   {
+      if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart2CoProcessor save step: "<<step);
+      if (comm->isRoot()) UBLOG(logINFO, "Save check point - start");
+      /*if (comm->isRoot())*/ clearAllFiles((int)step);
+      writeBlocks((int)step);
+      writeDataSet((int)step);
+      writeBoundaryConds((int)step);
+      if (comm->isRoot()) UBLOG(logINFO, "Save check point - end");
+   }
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart21CoProcessor::clearAllFiles(int step)
+{
+   MPI_File file_handler1, file_handler2, file_handler3;
+   MPI_Info info = MPI_INFO_NULL;
+   MPI_Offset new_size = 0;
+
+   UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
+   std::string filename1 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
+   //MPI_File_delete(filename1.c_str(), info);
+   int rc1 = MPI_File_open(MPI_COMM_WORLD, filename1.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler1);
+   if (rc1 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename1);
+   MPI_File_set_size(file_handler1, new_size);
+   //MPI_File_sync(file_handler1);
+   MPI_File_close(&file_handler1);
+
+   std::string filename2 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+   //MPI_File_delete(filename2.c_str(), info);
+   int rc2 = MPI_File_open(MPI_COMM_WORLD, filename2.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler2);
+   if (rc2 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename2);
+   MPI_File_set_size(file_handler2, new_size);
+   //MPI_File_sync(file_handler2);
+   MPI_File_close(&file_handler2);
+
+   std::string filename3 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
+   //MPI_File_delete(filename3.c_str(), info);
+   int rc3 = MPI_File_open(MPI_COMM_WORLD, filename3.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler3);
+   if (rc3 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename3);
+   MPI_File_set_size(file_handler3, new_size);
+   //MPI_File_sync(file_handler3);
+   MPI_File_close(&file_handler3);
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart21CoProcessor::writeBlocks(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   //MPI_Comm_size(MPI_COMM_WORLD, &size);
+   size=1;
+
+   grid->deleteBlockIDs();
+   RenumberBlockVisitor renumber;
+   grid->accept(renumber);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBlocks start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   int blocksCount = 0; // quantity of all the blocks in the grid, max 2147483648 blocks!
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+
+   std::vector<Block3DPtr> blocksVector[25]; // max 25 levels
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      //grid->getBlocks(level, rank, blockVector[level]);
+      grid->getBlocks(level, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   GridParam* gridParameters = new GridParam;
+   gridParameters->trafoParams[0] = grid->getCoordinateTransformator()->Tx1;
+   gridParameters->trafoParams[1] = grid->getCoordinateTransformator()->Tx2;
+   gridParameters->trafoParams[2] = grid->getCoordinateTransformator()->Tx3;
+   gridParameters->trafoParams[3] = grid->getCoordinateTransformator()->Sx1;
+   gridParameters->trafoParams[4] = grid->getCoordinateTransformator()->Sx2;
+   gridParameters->trafoParams[5] = grid->getCoordinateTransformator()->Sx3;
+   gridParameters->trafoParams[6] = grid->getCoordinateTransformator()->alpha;
+   gridParameters->trafoParams[7] = grid->getCoordinateTransformator()->beta;
+   gridParameters->trafoParams[8] = grid->getCoordinateTransformator()->gamma;
+
+   gridParameters->trafoParams[9] = grid->getCoordinateTransformator()->toX1factorX1;
+   gridParameters->trafoParams[10] = grid->getCoordinateTransformator()->toX1factorX2;
+   gridParameters->trafoParams[11] = grid->getCoordinateTransformator()->toX1factorX3;
+   gridParameters->trafoParams[12] = grid->getCoordinateTransformator()->toX1delta;
+   gridParameters->trafoParams[13] = grid->getCoordinateTransformator()->toX2factorX1;
+   gridParameters->trafoParams[14] = grid->getCoordinateTransformator()->toX2factorX2;
+   gridParameters->trafoParams[15] = grid->getCoordinateTransformator()->toX2factorX3;
+   gridParameters->trafoParams[16] = grid->getCoordinateTransformator()->toX2delta;
+   gridParameters->trafoParams[17] = grid->getCoordinateTransformator()->toX3factorX1;
+   gridParameters->trafoParams[18] = grid->getCoordinateTransformator()->toX3factorX2;
+   gridParameters->trafoParams[19] = grid->getCoordinateTransformator()->toX3factorX3;
+   gridParameters->trafoParams[20] = grid->getCoordinateTransformator()->toX3delta;
+
+   gridParameters->trafoParams[21] = grid->getCoordinateTransformator()->fromX1factorX1;
+   gridParameters->trafoParams[22] = grid->getCoordinateTransformator()->fromX1factorX2;
+   gridParameters->trafoParams[23] = grid->getCoordinateTransformator()->fromX1factorX3;
+   gridParameters->trafoParams[24] = grid->getCoordinateTransformator()->fromX1delta;
+   gridParameters->trafoParams[25] = grid->getCoordinateTransformator()->fromX2factorX1;
+   gridParameters->trafoParams[26] = grid->getCoordinateTransformator()->fromX2factorX2;
+   gridParameters->trafoParams[27] = grid->getCoordinateTransformator()->fromX2factorX3;
+   gridParameters->trafoParams[28] = grid->getCoordinateTransformator()->fromX2delta;
+   gridParameters->trafoParams[29] = grid->getCoordinateTransformator()->fromX3factorX1;
+   gridParameters->trafoParams[30] = grid->getCoordinateTransformator()->fromX3factorX2;
+   gridParameters->trafoParams[31] = grid->getCoordinateTransformator()->fromX3factorX3;
+   gridParameters->trafoParams[32] = grid->getCoordinateTransformator()->fromX3delta;
+
+   gridParameters->active = grid->getCoordinateTransformator()->active;
+   gridParameters->transformation = grid->getCoordinateTransformator()->transformation;
+
+   gridParameters->deltaX = grid->getDeltaX(minInitLevel);
+   UbTupleInt3 blocknx = grid->getBlockNX();
+   gridParameters->blockNx1 = val<1>(blocknx);
+   gridParameters->blockNx2 = val<2>(blocknx);
+   gridParameters->blockNx3 = val<3>(blocknx);
+   gridParameters->nx1 = grid->getNX1();
+   gridParameters->nx2 = grid->getNX2();
+   gridParameters->nx3 = grid->getNX3();
+   gridParameters->periodicX1 = grid->isPeriodicX1();
+   gridParameters->periodicX2 = grid->isPeriodicX2();
+   gridParameters->periodicX3 = grid->isPeriodicX3();
+
+   //----------------------------------------------------------------------
+
+   Block3d* block3dArray = new Block3d[blocksCount];
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	all the blocks of the current level
+      {
+         // save data describing the block
+         block3dArray[ic].x1 = block->getX1();
+         block3dArray[ic].x2 = block->getX2();
+         block3dArray[ic].x3 = block->getX3();
+         block3dArray[ic].bundle = block->getBundle();
+         block3dArray[ic].rank = block->getRank();
+         block3dArray[ic].lrank = block->getLocalRank();
+         block3dArray[ic].part = block->getPart();
+         block3dArray[ic].globalID = block->getGlobalID();
+         block3dArray[ic].localID = block->getLocalID();
+         block3dArray[ic].level = block->getLevel();
+         block3dArray[ic].interpolationFlagCF = block->getInterpolationFlagCF();
+         block3dArray[ic].interpolationFlagFC = block->getInterpolationFlagFC();
+         block3dArray[ic].counter = block->getMaxGlobalID();
+         block3dArray[ic].active = block->isActive();
+
+         ic++;
+      }
+   }
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBlocks start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   // write to the file
+   MPI_File file_handler;
+   MPI_Info info = MPI_INFO_NULL;
+   //MPI_Info_create (&info);
+   //MPI_Info_set(info,"romio_cb_write","enable");
+   //MPI_Info_set(info,"cb_buffer_size","4194304");
+   //MPI_Info_set(info,"striping_unit","4194304");
+
+   // if (comm->isRoot())
+   // {
+      UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
+      std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBlocks.bin";
+      int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file_handler);
+      if (rc != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename);
+   // }
+
+   double start, finish;
+   MPI_Offset write_offset = (MPI_Offset)(size*sizeof(int));
+      
+   if (comm->isRoot())
+   {
+      start = MPI_Wtime();
+      
+      // each process writes the quantity of it's blocks
+      MPI_File_write_at(file_handler, 0/*rank*sizeof(int)*/, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+      // each process writes parameters of the grid
+      MPI_File_write_at(file_handler, write_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
+      // each process writes it's blocks
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset +sizeof(GridParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
+      //MPI_File_sync(file_handler);
+   }
+   MPI_File_close(&file_handler);
+ 
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBlocks time: "<<finish-start<<" s");
+   }
+
+   delete[] block3dArray;
+   delete gridParameters;
+}
+
+void MPIIORestart21CoProcessor::writeDataSet(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   int blocksCount = 0; // quantity of blocks, that belong to this process 
+
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   DataSet* dataSetArray = new DataSet[blocksCount];
+   std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeDataSet start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   bool firstBlock = true;
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
+      {
+         dataSetArray[ic].globalID = block->getGlobalID();     // id of the block needed to find it while regenerating the grid
+         dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
+         dataSetArray[ic].collFactor = block->getKernel()->getCollisionFactor();
+         dataSetArray[ic].deltaT = block->getKernel()->getDeltaT();
+         dataSetArray[ic].compressible = block->getKernel()->getCompressible();
+         dataSetArray[ic].withForcing = block->getKernel()->getWithForcing();
+
+         if (firstBlock)// && block->getKernel()) // when first (any) valid block...
+         {
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageValuesArray3DPtr = block->getKernel()->getDataSet()->getAverageValues();
+            if (AverageValuesArray3DPtr)
+            {
+               dataSetParamStr.nx[0][0] = static_cast<int>(AverageValuesArray3DPtr->getNX1());
+               dataSetParamStr.nx[0][1] = static_cast<int>(AverageValuesArray3DPtr->getNX2());
+               dataSetParamStr.nx[0][2] = static_cast<int>(AverageValuesArray3DPtr->getNX3());
+               dataSetParamStr.nx[0][3] = static_cast<int>(AverageValuesArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+            if (AverageVelocityArray3DPtr)
+            {
+               dataSetParamStr.nx[1][0] = static_cast<int>(AverageVelocityArray3DPtr->getNX1());
+               dataSetParamStr.nx[1][1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
+               dataSetParamStr.nx[1][2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
+               dataSetParamStr.nx[1][3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+            if (AverageFluctArray3DPtr)
+            {
+               dataSetParamStr.nx[2][0] = static_cast<int>(AverageFluctArray3DPtr->getNX1());
+               dataSetParamStr.nx[2][1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
+               dataSetParamStr.nx[2][2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
+               dataSetParamStr.nx[2][3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+            if (AverageTripleArray3DPtr)
+            {
+               dataSetParamStr.nx[3][0] = static_cast<int>(AverageTripleArray3DPtr->getNX1());
+               dataSetParamStr.nx[3][1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
+               dataSetParamStr.nx[3][2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
+               dataSetParamStr.nx[3][3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+            if (ShearStressValArray3DPtr)
+            {
+               dataSetParamStr.nx[4][0] = static_cast<int>(ShearStressValArray3DPtr->getNX1());
+               dataSetParamStr.nx[4][1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
+               dataSetParamStr.nx[4][2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
+               dataSetParamStr.nx[4][3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
+            }
+
+            boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+            if (relaxationFactor3DPtr)
+            {
+               dataSetParamStr.nx[5][0] = static_cast<int>(relaxationFactor3DPtr->getNX1());
+               dataSetParamStr.nx[5][1] = static_cast<int>(relaxationFactor3DPtr->getNX2());
+               dataSetParamStr.nx[5][2] = static_cast<int>(relaxationFactor3DPtr->getNX3());
+               dataSetParamStr.nx[5][3] = 1;
+            }
+
+            boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
+            CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
+            if (localDistributions)
+            {
+               dataSetParamStr.nx[6][0] = static_cast<int>(localDistributions->getNX1());
+               dataSetParamStr.nx[6][1] = static_cast<int>(localDistributions->getNX2());
+               dataSetParamStr.nx[6][2] = static_cast<int>(localDistributions->getNX3());
+               dataSetParamStr.nx[6][3] = static_cast<int>(localDistributions->getNX4());
+            }
+
+            CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
+            if (nonLocalDistributions)
+            {
+               dataSetParamStr.nx[7][0] = static_cast<int>(nonLocalDistributions->getNX1());
+               dataSetParamStr.nx[7][1] = static_cast<int>(nonLocalDistributions->getNX2());
+               dataSetParamStr.nx[7][2] = static_cast<int>(nonLocalDistributions->getNX3());
+               dataSetParamStr.nx[7][3] = static_cast<int>(nonLocalDistributions->getNX4());
+            }
+
+            CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
+            if (zeroDistributions)
+            {
+               dataSetParamStr.nx[8][0] = static_cast<int>(zeroDistributions->getNX1());
+               dataSetParamStr.nx[8][1] = static_cast<int>(zeroDistributions->getNX2());
+               dataSetParamStr.nx[8][2] = static_cast<int>(zeroDistributions->getNX3());
+               dataSetParamStr.nx[8][3] = 1;
+            }
+
+            // ... than save some parameters that are equal in all blocks
+            dataSetParamStr.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
+            dataSetParamStr.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
+            dataSetParamStr.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
+
+            firstBlock = false;
+
+            // how many elements are in all arrays of DataSet (equal in all blocks)
+            int doubleCount = 0, temp;
+            for (int i = 0; i < 9; i++)   // 9 arrays ( averageValues, averageVelocity, averageFluktuations,
+            {                 // averageTriplecorrelations, shearStressValues, relaxationFactor, 3 * fdistributions
+               temp = 1;
+               for (int ii = 0; ii < 4; ii++)   // 4 values (nx1, nx2, nx3, nx4)
+                  temp *= dataSetParamStr.nx[i][ii];
+               doubleCount += temp;
+            }
+            dataSetParamStr.doubleCountInBlock = doubleCount;
+         }
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageValuesArray3DPtr = block->getKernel()->getDataSet()->getAverageValues();
+         if (AverageValuesArray3DPtr&&(dataSetParamStr.nx[0][0]>0)&&(dataSetParamStr.nx[0][1]>0)&&(dataSetParamStr.nx[0][2]>0)&&(dataSetParamStr.nx[0][3]>0))
+           doubleValuesArray.insert(doubleValuesArray.end(), AverageValuesArray3DPtr->getDataVector().begin(), AverageValuesArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
+         if (AverageVelocityArray3DPtr&&(dataSetParamStr.nx[1][0]>0)&&(dataSetParamStr.nx[1][1]>0)&&(dataSetParamStr.nx[1][2]>0)&&(dataSetParamStr.nx[1][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
+         if (AverageFluctArray3DPtr&&(dataSetParamStr.nx[2][0]>0)&&(dataSetParamStr.nx[2][1]>0)&&(dataSetParamStr.nx[2][2]>0)&&(dataSetParamStr.nx[2][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(), AverageFluctArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+         if (AverageTripleArray3DPtr&&(dataSetParamStr.nx[3][0]>0)&&(dataSetParamStr.nx[3][1]>0)&&(dataSetParamStr.nx[3][2]>0)&&(dataSetParamStr.nx[3][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
+         if (ShearStressValArray3DPtr&&(dataSetParamStr.nx[4][0]>0)&&(dataSetParamStr.nx[4][1]>0)&&(dataSetParamStr.nx[4][2]>0)&&(dataSetParamStr.nx[4][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(), ShearStressValArray3DPtr->getDataVector().end());
+
+         boost::shared_ptr< CbArray3D<LBMReal, IndexerX3X2X1> > RelaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
+         if (RelaxationFactor3DPtr&&(dataSetParamStr.nx[5][0]>0)&&(dataSetParamStr.nx[5][1]>0)&&(dataSetParamStr.nx[5][2]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), RelaxationFactor3DPtr->getDataVector().begin(), RelaxationFactor3DPtr->getDataVector().end());
+
+         boost::shared_ptr< D3Q27EsoTwist3DSplittedVector > D3Q27EsoTwist3DSplittedVectorPtr = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
+         CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getLocalDistributions();
+         if (localDistributions&&(dataSetParamStr.nx[6][0]>0)&&(dataSetParamStr.nx[6][1]>0)&&(dataSetParamStr.nx[6][2]>0)&&(dataSetParamStr.nx[6][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), localDistributions->getDataVector().begin(), localDistributions->getDataVector().end());
+
+         CbArray4D <LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getNonLocalDistributions();
+         if (nonLocalDistributions&&(dataSetParamStr.nx[7][0]>0)&&(dataSetParamStr.nx[7][1]>0)&&(dataSetParamStr.nx[7][2]>0)&&(dataSetParamStr.nx[7][3]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributions->getDataVector().begin(), nonLocalDistributions->getDataVector().end());
+
+         CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributions = D3Q27EsoTwist3DSplittedVectorPtr->getZeroDistributions();
+         if (zeroDistributions&&(dataSetParamStr.nx[8][0]>0)&&(dataSetParamStr.nx[8][1]>0)&&(dataSetParamStr.nx[8][2]>0))
+            doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributions->getDataVector().begin(), zeroDistributions->getDataVector().end());
+
+         ic++;
+      }
+   }
+
+   // register new MPI-type depending on the block-specific information
+   MPI_Type_contiguous(dataSetParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+   MPI_Type_commit(&dataSetDoubleType);
+   mpiTypeFreeFlag = true;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeDataSet start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_Info info = MPI_INFO_NULL;
+   //MPI_Info_create (&info);
+   //MPI_Info_set(info,"romio_cb_write","enable");
+   //MPI_Info_set(info,"cb_buffer_size","4194304");
+   //MPI_Info_set(info,"striping_unit","4194304");
+
+   // write to the file
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE|MPI_MODE_WRONLY, info, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+   
+   size_t sizeofOneDataSet = sizeof(DataSet) + dataSetParamStr.doubleCountInBlock * sizeof(double);
+   MPI_Offset write_offset = 0;
+
+   MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
+
+   for (int nb = 0; nb < blocksCount; nb++)
+   {
+      write_offset = (MPI_Offset)(sizeof(dataSetParam) + dataSetArray[nb].globalID * sizeofOneDataSet);
+      MPI_File_write_at(file_handler, write_offset, &dataSetArray[nb], 1, dataSetType, MPI_STATUS_IGNORE);
+      MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(DataSet)), &doubleValuesArray[nb * dataSetParamStr.doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+   }
+
+   MPI_File_sync(file_handler);
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeDataSet time: "<<finish-start<<" s");
+   }
+
+   delete[] dataSetArray;
+}
+
+void MPIIORestart21CoProcessor::writeBoundaryConds(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBoundaryConds start collect data rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   int blocksCount = 0;    // quantity of blocks, that belong to this process
+   size_t allBytesCount = 0;  // quantity of bytes, that one process writes to the file
+   size_t count_boundCond = 0;	// how many BoundaryConditions in all blocks
+   int count_indexContainer = 0;	// how many indexContainer-values in all blocks
+
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   BCAdd* bcAddArray = new BCAdd[blocksCount];
+   size_t* bytesCount = new size_t[blocksCount];  // quantity of bytes, that each block writes to the file
+   std::vector<BoundaryCondition>* bcVector = new std::vector<BoundaryCondition>[blocksCount];
+   std::vector<int>* bcindexmatrixVector = new std::vector<int>[blocksCount];
+   std::vector<int>* indexContainerVector = new std::vector<int>[blocksCount];
+
+   bool bcindexmatrixCountNotInit = true;
+   int ic = 0;
+   for (int level = minInitLevel; level<=maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  // all the blocks of the current level
+      {
+         BCArray3DPtr bcArr = block->getKernel()->getBCProcessor()->getBCArray();
+
+         bcAddArray[ic].globalID = block->getGlobalID(); // id of the block needed to find it while regenerating the grid
+         bcAddArray[ic].boundCond_count = 0;             // how many BoundaryConditions in this block
+         bcAddArray[ic].indexContainer_count = 0;        // how many indexContainer-values in this block
+         bytesCount[ic] = sizeof(BCAdd);
+         bcVector[ic].resize(0);
+         bcindexmatrixVector[ic].resize(0);
+         indexContainerVector[ic].resize(0);
+
+         for (int bc = 0; bc<bcArr->getBCVectorSize(); bc++)
+         {
+            BoundaryCondition* bouCond = new BoundaryCondition();
+            if (bcArr->bcvector[bc]==NULL)
+            {
+               memset(bouCond, 0, sizeof(BoundaryCondition));
+            }
+            else
+            {
+               bouCond->noslipBoundaryFlags = bcArr->bcvector[bc]->getNoSlipBoundary();
+               bouCond->slipBoundaryFlags = bcArr->bcvector[bc]->getSlipBoundary();
+               bouCond->velocityBoundaryFlags = bcArr->bcvector[bc]->getVelocityBoundary();
+               bouCond->densityBoundaryFlags = bcArr->bcvector[bc]->getDensityBoundary();
+               bouCond->wallModelBoundaryFlags = bcArr->bcvector[bc]->getWallModelBoundary();
+               bouCond->bcVelocityX1 = bcArr->bcvector[bc]->getBoundaryVelocityX1();
+               bouCond->bcVelocityX2 = bcArr->bcvector[bc]->getBoundaryVelocityX2();
+               bouCond->bcVelocityX3 = bcArr->bcvector[bc]->getBoundaryVelocityX3();
+               bouCond->bcDensity = bcArr->bcvector[bc]->getBoundaryDensity();
+               bouCond->bcLodiDensity = bcArr->bcvector[bc]->getDensityLodiDensity();
+               bouCond->bcLodiVelocityX1 = bcArr->bcvector[bc]->getDensityLodiVelocityX1();
+               bouCond->bcLodiVelocityX2 = bcArr->bcvector[bc]->getDensityLodiVelocityX2();
+               bouCond->bcLodiVelocityX3 = bcArr->bcvector[bc]->getDensityLodiVelocityX3();
+               bouCond->bcLodiLentgh = bcArr->bcvector[bc]->getDensityLodiLength();
+               bouCond->nx1 = bcArr->bcvector[bc]->nx1;
+               bouCond->nx2 = bcArr->bcvector[bc]->nx2;
+               bouCond->nx3 = bcArr->bcvector[bc]->nx3;
+               for (int iq = 0; iq<26; iq++)
+                  bouCond->q[iq] = bcArr->bcvector[bc]->getQ(iq);
+               bouCond->algorithmType = bcArr->bcvector[bc]->getBcAlgorithmType();
+            }
+
+            bcVector[ic].push_back(*bouCond);
+            bcAddArray[ic].boundCond_count++;
+            count_boundCond++;
+            bytesCount[ic] += sizeof(BoundaryCondition);
+         }
+         
+         if (bcindexmatrixCountNotInit)
+         {
+            boundCondParamStr.nx1 = static_cast<int>(bcArr->bcindexmatrix.getNX1());
+            boundCondParamStr.nx2 = static_cast<int>(bcArr->bcindexmatrix.getNX2());
+            boundCondParamStr.nx3 = static_cast<int>(bcArr->bcindexmatrix.getNX3());
+            boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
+            bcindexmatrixCountNotInit = false;
+         }
+         bcindexmatrixVector[ic].insert(bcindexmatrixVector[ic].begin(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
+         bytesCount[ic] += boundCondParamStr.bcindexmatrixCount * sizeof(int);
+
+         indexContainerVector[ic].insert(indexContainerVector[ic].begin(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
+         bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
+         count_indexContainer += bcAddArray[ic].indexContainer_count;
+         bytesCount[ic] += bcAddArray[ic].indexContainer_count * sizeof(int);
+
+         allBytesCount += bytesCount[ic];
+
+         ic++;
+      }
+   }
+
+   MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+   MPI_Type_commit(&bcindexmatrixType);
+   mpiTypeFreeFlag = true;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBoundaryConds start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_Info info = MPI_INFO_NULL;
+   //MPI_Info_create (&info);
+   //MPI_Info_set(info,"romio_cb_write","enable");
+   //MPI_Info_set(info,"cb_buffer_size","4194304");
+   //MPI_Info_set(info,"striping_unit","4194304");
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE|MPI_MODE_WRONLY, info, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   MPI_Offset write_offset = (MPI_Offset)(sizeof(boundCondParam) + grid->getNumberOfBlocks() * sizeof(size_t));
+   size_t next_file_offset = 0;
+   if (size > 1)
+   {
+      if (rank == 0)
+      {
+         next_file_offset = write_offset + allBytesCount;
+         MPI_Send(&next_file_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
+      }
+      else
+      {
+         MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+         next_file_offset = write_offset + allBytesCount;
+         if (rank < size - 1)
+            MPI_Send(&next_file_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
+      }
+   }
+
+   MPI_File_write_at(file_handler, 0, &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+
+   MPI_Offset write_offsetIndex;
+
+   for (int nb = 0; nb < blocksCount; nb++)
+   {
+      write_offsetIndex = (MPI_Offset)(sizeof(boundCondParam) + bcAddArray[nb].globalID * sizeof(size_t));
+      MPI_File_write_at(file_handler, write_offsetIndex, &write_offset, 1, MPI_LONG_LONG_INT, MPI_STATUS_IGNORE);
+      
+      MPI_File_write_at(file_handler, write_offset, &bcAddArray[nb], 1, boundCondTypeAdd, MPI_STATUS_IGNORE);
+      if (bcVector[nb].size() > 0)
+         MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(BCAdd)), &bcVector[nb][0], bcAddArray[nb].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
+
+      if (bcindexmatrixVector[nb].size() > 0)
+         MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(BCAdd) + bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition)),
+            &bcindexmatrixVector[nb][0], 1, bcindexmatrixType, MPI_STATUS_IGNORE);
+     
+      if (indexContainerVector[nb].size() > 0)
+         MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(BCAdd) + bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition) + boundCondParamStr.bcindexmatrixCount * sizeof(int)),
+            &indexContainerVector[nb][0], bcAddArray[nb].indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
+      
+      write_offset += bytesCount[nb];
+   }
+
+   MPI_File_sync(file_handler);
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::writeBoundaryConds time: "<<finish-start<<" s");
+   }
+
+   delete[] bcAddArray;
+   delete[] bytesCount;
+   delete[] bcVector;
+   delete[] bcindexmatrixVector;
+   delete[] indexContainerVector;
+}
+
+//------------------------------------------- READ -----------------------------------------------
+void MPIIORestart21CoProcessor::restart(int step)
+{
+   if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart21CoProcessor restart step: "<<step);
+   if (comm->isRoot()) UBLOG(logINFO, "Load check point - start");
+   readBlocks(step);
+   readDataSet(step);
+   readBoundaryConds(step);
+   if (comm->isRoot()) UBLOG(logINFO, "Load check point - end");
+   this->reconnect(grid);
+}
+
+void MPIIORestart21CoProcessor::readBlocks(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   //MPI_Comm_size(MPI_COMM_WORLD, &size);
+   size = 1;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBlocks start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBlocks.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   // read count of blocks
+   int blocksCount = 0;
+   //MPI_File_read_at(file_handler, rank*sizeof(int), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   MPI_File_read_at(file_handler, 0, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
+   Block3d* block3dArray = new Block3d[blocksCount];
+   
+   GridParam* gridParameters = new GridParam;
+
+   // calculate the read offset
+   MPI_Offset read_offset = (MPI_Offset)(size*sizeof(int));
+
+   // read parameters of the grid
+   MPI_File_read_at(file_handler, read_offset, gridParameters, 1, gridParamType, MPI_STATUS_IGNORE);
+   // read all the blocks
+   MPI_File_read_at(file_handler, (MPI_Offset)(read_offset+sizeof(GridParam)), &block3dArray[0], blocksCount, block3dType, MPI_STATUS_IGNORE);
+
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBlocks time: "<<finish-start<<" s");
+   }
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBlocks start of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   // clear the grid
+   std::vector<Block3DPtr> blocksVector;
+   grid->getBlocks(0, blocksVector);
+   int del = 0;
+   BOOST_FOREACH(Block3DPtr block, blocksVector)
+   {
+      grid->deleteBlock(block);
+      del++;
+   }
+
+   // restore the grid
+   CoordinateTransformation3DPtr trafo(new CoordinateTransformation3D());
+   trafo->Tx1 = gridParameters->trafoParams[0];
+   trafo->Tx2 = gridParameters->trafoParams[1];
+   trafo->Tx3 = gridParameters->trafoParams[2];
+   trafo->Sx1 = gridParameters->trafoParams[3];
+   trafo->Sx2 = gridParameters->trafoParams[4];
+   trafo->Sx3 = gridParameters->trafoParams[5];
+   trafo->alpha = gridParameters->trafoParams[6];
+   trafo->beta = gridParameters->trafoParams[7];
+   trafo->gamma = gridParameters->trafoParams[8];
+
+   trafo->toX1factorX1 = gridParameters->trafoParams[9];
+   trafo->toX1factorX2 = gridParameters->trafoParams[10];
+   trafo->toX1factorX3 = gridParameters->trafoParams[11];
+   trafo->toX1delta = gridParameters->trafoParams[12];
+   trafo->toX2factorX1 = gridParameters->trafoParams[13];
+   trafo->toX2factorX2 = gridParameters->trafoParams[14];
+   trafo->toX2factorX3 = gridParameters->trafoParams[15];
+   trafo->toX2delta = gridParameters->trafoParams[16];
+   trafo->toX3factorX1 = gridParameters->trafoParams[17];
+   trafo->toX3factorX2 = gridParameters->trafoParams[18];
+   trafo->toX3factorX3 = gridParameters->trafoParams[19];
+   trafo->toX3delta = gridParameters->trafoParams[20];
+
+   trafo->fromX1factorX1 = gridParameters->trafoParams[21];
+   trafo->fromX1factorX2 = gridParameters->trafoParams[22];
+   trafo->fromX1factorX3 = gridParameters->trafoParams[23];
+   trafo->fromX1delta = gridParameters->trafoParams[24];
+   trafo->fromX2factorX1 = gridParameters->trafoParams[25];
+   trafo->fromX2factorX2 = gridParameters->trafoParams[26];
+   trafo->fromX2factorX3 = gridParameters->trafoParams[27];
+   trafo->fromX2delta = gridParameters->trafoParams[28];
+   trafo->fromX3factorX1 = gridParameters->trafoParams[29];
+   trafo->fromX3factorX2 = gridParameters->trafoParams[30];
+   trafo->fromX3factorX3 = gridParameters->trafoParams[31];
+   trafo->fromX3delta = gridParameters->trafoParams[32];
+
+   trafo->active = gridParameters->active;
+   trafo->transformation = gridParameters->transformation;
+
+   grid->setCoordinateTransformator(trafo);
+
+   grid->setDeltaX(gridParameters->deltaX);
+   grid->setBlockNX(gridParameters->blockNx1, gridParameters->blockNx2, gridParameters->blockNx3);
+   grid->setNX1(gridParameters->nx1);
+   grid->setNX2(gridParameters->nx2);
+   grid->setNX3(gridParameters->nx3);
+   grid->setPeriodicX1(gridParameters->periodicX1);
+   grid->setPeriodicX2(gridParameters->periodicX2);
+   grid->setPeriodicX3(gridParameters->periodicX3);
+
+   // regenerate blocks
+   for (int n = 0; n<blocksCount; n++)
+   {
+      Block3DPtr block(new Block3D(block3dArray[n].x1, block3dArray[n].x2, block3dArray[n].x3, block3dArray[n].level));
+      block->setActive(block3dArray[n].active);
+      block->setBundle(block3dArray[n].bundle);
+      block->setRank(block3dArray[n].rank);
+      block->setLocalRank(block3dArray[n].lrank);
+      block->setGlobalID(block3dArray[n].globalID);
+      block->setLocalID(block3dArray[n].localID);
+      block->setPart(block3dArray[n].part);
+      block->setLevel(block3dArray[n].level);
+      block->interpolationFlagCF = block3dArray[n].interpolationFlagCF;
+      block->interpolationFlagFC = block3dArray[n].interpolationFlagFC;
+
+      grid->addBlock(block);
+   }
+
+   delete gridParameters;
+   delete[] block3dArray;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBlocks end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+
+void MPIIORestart21CoProcessor::readDataSet(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readDataSet start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   int blocksCount = 0; // quantity of the blocks, that belong to this process
+
+   // read from the grid the blocks, that belong to this process
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level <= maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   DataSet* dataSetArray = new DataSet[blocksCount];
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+ 
+   MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
+   
+   std::vector<double> doubleValuesArray(blocksCount * dataSetParamStr.doubleCountInBlock); // double-values in all blocks 
+
+   // define MPI_types depending on the block-specific information
+   MPI_Type_contiguous(dataSetParamStr.doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
+   MPI_Type_commit(&dataSetDoubleType);
+   mpiTypeFreeFlag = true;
+
+   int ic = 0;
+   MPI_Offset read_offset;
+   size_t sizeofOneDataSet = size_t(sizeof(DataSet) + dataSetParamStr.doubleCountInBlock * sizeof(double));
+   for (int level = minInitLevel; level <= maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
+      {
+         read_offset = (MPI_Offset)(sizeof(dataSetParam) + block->getGlobalID() * sizeofOneDataSet);
+         MPI_File_read_at(file_handler, read_offset, &dataSetArray[ic], 1, dataSetType, MPI_STATUS_IGNORE);
+         MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(DataSet)), &doubleValuesArray[ic * dataSetParamStr.doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+         ic++;
+      }
+   }
+
+   MPI_File_close(&file_handler);
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readDataSet time: "<<finish-start<<" s");
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readDataSet start of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+   
+   size_t index = 0, nextVectorSize = 0;
+   std::vector<double> vectorsOfValues[9];
+   for (int n = 0; n<blocksCount; n++)
+   {
+      for (int b = 0; b<9; b++) // assign approciate vectors to 9 dataSet arrays
+      {
+         nextVectorSize = dataSetParamStr.nx[b][0]* dataSetParamStr.nx[b][1]* dataSetParamStr.nx[b][2]* dataSetParamStr.nx[b][3];
+         vectorsOfValues[b].assign(doubleValuesArray.data()+index, doubleValuesArray.data()+index+nextVectorSize);
+         index += nextVectorSize;
+      }
+
+      // fill dataSet arrays
+      AverageValuesArray3DPtr mAverageValues;
+      if ((dataSetParamStr.nx[0][0]==0)&&(dataSetParamStr.nx[0][1]==0)&&(dataSetParamStr.nx[0][2]==0)&&(dataSetParamStr.nx[0][3]==0))
+         mAverageValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[0], dataSetParamStr.nx[0][0], dataSetParamStr.nx[0][1], dataSetParamStr.nx[0][2], dataSetParamStr.nx[0][3]));
+
+      AverageValuesArray3DPtr mAverageVelocity;
+      if ((dataSetParamStr.nx[1][0]==0)&&(dataSetParamStr.nx[1][1]==0)&&(dataSetParamStr.nx[1][2]==0)&&(dataSetParamStr.nx[1][3]==0))
+         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[1], dataSetParamStr.nx[1][0], dataSetParamStr.nx[1][1], dataSetParamStr.nx[1][2], dataSetParamStr.nx[1][3]));
+
+      AverageValuesArray3DPtr mAverageFluktuations;
+      if ((dataSetParamStr.nx[2][0]==0)&&(dataSetParamStr.nx[2][1]==0)&&(dataSetParamStr.nx[2][2]==0)&&(dataSetParamStr.nx[2][3]==0))
+         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[2], dataSetParamStr.nx[2][0], dataSetParamStr.nx[2][1], dataSetParamStr.nx[2][2], dataSetParamStr.nx[2][3]));
+
+      AverageValuesArray3DPtr mAverageTriplecorrelations;
+      if ((dataSetParamStr.nx[3][0]==0)&&(dataSetParamStr.nx[3][1]==0)&&(dataSetParamStr.nx[3][2]==0)&&(dataSetParamStr.nx[3][3]==0))
+         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[3], dataSetParamStr.nx[3][0], dataSetParamStr.nx[3][1], dataSetParamStr.nx[3][2], dataSetParamStr.nx[3][3]));
+
+      ShearStressValuesArray3DPtr mShearStressValues;
+      if ((dataSetParamStr.nx[4][0]==0)&&(dataSetParamStr.nx[4][1]==0)&&(dataSetParamStr.nx[4][2]==0)&&(dataSetParamStr.nx[4][3]==0))
+         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr();
+      else
+         mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[4], dataSetParamStr.nx[4][0], dataSetParamStr.nx[4][1], dataSetParamStr.nx[4][2], dataSetParamStr.nx[4][3]));
+
+      RelaxationFactorArray3DPtr mRelaxationFactor;
+      if ((dataSetParamStr.nx[5][0]==0)&&(dataSetParamStr.nx[5][1]==0)&&(dataSetParamStr.nx[5][2]==0))
+         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr();
+      else
+         mRelaxationFactor = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[5], dataSetParamStr.nx[5][0], dataSetParamStr.nx[5][1], dataSetParamStr.nx[5][2]));
+
+      DistributionArray3DPtr mFdistributions(new D3Q27EsoTwist3DSplittedVector());
+
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[6], dataSetParamStr.nx[6][0], dataSetParamStr.nx[6][1], dataSetParamStr.nx[6][2], dataSetParamStr.nx[6][3])));
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues[7], dataSetParamStr.nx[7][0], dataSetParamStr.nx[7][1], dataSetParamStr.nx[7][2], dataSetParamStr.nx[7][3])));
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(vectorsOfValues[8], dataSetParamStr.nx[8][0], dataSetParamStr.nx[8][1], dataSetParamStr.nx[8][2])));
+ 
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr.nx1);
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr.nx2);
+      boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr.nx3);
+
+      DataSet3DPtr dataSetPtr = DataSet3DPtr(new DataSet3D());
+      dataSetPtr->setAverageValues(mAverageValues);
+      dataSetPtr->setAverageVelocity(mAverageVelocity);
+      dataSetPtr->setAverageFluctuations(mAverageFluktuations);
+      dataSetPtr->setAverageTriplecorrelations(mAverageTriplecorrelations);
+      dataSetPtr->setShearStressValues(mShearStressValues);
+      dataSetPtr->setRelaxationFactor(mRelaxationFactor);
+      dataSetPtr->setFdistributions(mFdistributions);
+
+      // find the nesessary block and fill it
+      Block3DPtr block = grid->getBlock(dataSetArray[n].globalID);
+      //LBMKernelPtr kernel(new CompressibleCumulantLBMKernel(0, 0, 0, CompressibleCumulantLBMKernel::NORMAL));
+      LBMKernelPtr kernel = this->lbmKernel->clone();
+      kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
+      kernel->setCollisionFactor(dataSetArray[n].collFactor);
+      kernel->setDeltaT(dataSetArray[n].deltaT);
+      kernel->setCompressible(dataSetArray[n].compressible);
+      kernel->setWithForcing(dataSetArray[n].withForcing);
+      kernel->setDataSet(dataSetPtr);
+      block->setKernel(kernel);
+   }
+
+   delete[] dataSetArray;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readDataSet end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+
+void MPIIORestart21CoProcessor::readBoundaryConds(int step)
+{
+   int rank, size;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBoundaryConds start MPI IO rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+
+   double start, finish;
+   if (comm->isRoot()) start = MPI_Wtime();
+
+   MPI_File file_handler;
+   std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpBC.bin";
+   int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+   if (rc!=MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file "+filename);
+
+   int blocksCount = 0; // quantity of the blocks, that belong to this process 
+   
+   // read from the grid the blocks, that belong to this process 
+   std::vector<Block3DPtr> blocksVector[25];
+   int minInitLevel = this->grid->getCoarsestInitializedLevel();
+   int maxInitLevel = this->grid->getFinestInitializedLevel();
+   for (int level = minInitLevel; level <= maxInitLevel; level++)
+   {
+      grid->getBlocks(level, rank, blocksVector[level]);
+      blocksCount += static_cast<int>(blocksVector[level].size());
+   }
+
+   BCAdd* bcAddArray = new BCAdd[blocksCount];
+   BoundaryCondition* nullBouCond = new BoundaryCondition();
+   memset(nullBouCond, 0, sizeof(BoundaryCondition));
+   BoundaryCondition* bcArray;
+   int* intArray1;
+   int* intArray2;
+   std::vector<BoundaryConditionsPtr> bcVector;
+   std::vector<int> bcindexmatrixV;
+   std::vector<int> indexContainerV;
+
+   if (comm->isRoot())
+   {
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBoundaryConds time: " << finish - start << " s");
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBoundaryConds start of restore of data, rank = " << rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+   }
+
+   MPI_File_read_at(file_handler, (MPI_Offset)0, &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
+   MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
+   MPI_Type_commit(&bcindexmatrixType);
+   mpiTypeFreeFlag = true;
+
+   int ic = 0;
+   MPI_Offset read_offset1, read_offset2;
+   for (int level = minInitLevel; level <= maxInitLevel; level++)
+   {
+      BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
+      {
+         read_offset1 = (MPI_Offset)(sizeof(boundCondParam) + block->getGlobalID() * sizeof(size_t));
+
+         MPI_File_read_at(file_handler, read_offset1, &read_offset2, 1, MPI_LONG_LONG_INT, MPI_STATUS_IGNORE);
+         MPI_File_read_at(file_handler, read_offset2, &bcAddArray[ic], 1, boundCondTypeAdd, MPI_STATUS_IGNORE);
+
+         bcArray = new BoundaryCondition[bcAddArray[ic].boundCond_count];
+         intArray1 = new int[boundCondParamStr.bcindexmatrixCount];
+         intArray2 = new int[bcAddArray[ic].indexContainer_count];
+
+         if (bcAddArray[ic].boundCond_count > 0)
+         {
+            MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAdd)), &bcArray[0], bcAddArray[ic].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
+         }
+         MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAdd) + bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition)),
+            &intArray1[0], 1, bcindexmatrixType, MPI_STATUS_IGNORE);
+         if (bcAddArray[ic].indexContainer_count > 0)
+         {
+            MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAdd) + bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition) + boundCondParamStr.bcindexmatrixCount * sizeof(int)),
+               &intArray2[0], bcAddArray[ic].indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
+         }
+
+         bcindexmatrixV.resize(0);
+         indexContainerV.resize(0);
+         bcVector.resize(0);
+
+         for (size_t ibc = 0; ibc<bcAddArray[ic].boundCond_count; ibc++)
+         {
+            BoundaryConditionsPtr bc;
+            if (memcmp(&bcArray[ibc], nullBouCond, sizeof(BoundaryCondition))==0)
+               bc = BoundaryConditionsPtr();
+            else
+            {
+               bc = BoundaryConditionsPtr(new BoundaryConditions);
+               bc->noslipBoundaryFlags = bcArray[ibc].noslipBoundaryFlags;
+               bc->slipBoundaryFlags = bcArray[ibc].slipBoundaryFlags;
+               bc->densityBoundaryFlags = bcArray[ibc].densityBoundaryFlags;
+               bc->velocityBoundaryFlags = bcArray[ibc].velocityBoundaryFlags;
+               bc->wallModelBoundaryFlags = bcArray[ibc].wallModelBoundaryFlags;
+               bc->bcVelocityX1 = bcArray[ibc].bcVelocityX1;
+               bc->bcVelocityX2 = bcArray[ibc].bcVelocityX2;
+               bc->bcVelocityX3 = bcArray[ibc].bcVelocityX3;
+               bc->bcDensity = bcArray[ibc].bcDensity;
+               bc->bcLodiDensity = bcArray[ibc].bcLodiDensity;
+               bc->bcLodiVelocityX1 = bcArray[ibc].bcLodiVelocityX1;
+               bc->bcLodiVelocityX2 = bcArray[ibc].bcLodiVelocityX2;
+               bc->bcLodiVelocityX3 = bcArray[ibc].bcLodiVelocityX3;
+               bc->bcLodiLentgh = bcArray[ibc].bcLodiLentgh;
+
+               bc->nx1 = bcArray[ibc].nx1;
+               bc->nx2 = bcArray[ibc].nx2;
+               bc->nx3 = bcArray[ibc].nx3;
+               for (int iq = 0; iq<26; iq++)
+                  bc->setQ(bcArray[ibc].q[iq], iq);
+               bc->setBcAlgorithmType(bcArray[ibc].algorithmType);
+            }
+
+            bcVector.push_back(bc);
+         }
+
+         for (int b1 = 0; b1 < boundCondParamStr.bcindexmatrixCount; b1++)
+            bcindexmatrixV.push_back(intArray1[b1]);
+
+         for (int b2 = 0; b2 < bcAddArray[ic].indexContainer_count; b2++)
+            indexContainerV.push_back(intArray2[b2]);
+         
+         CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2, boundCondParamStr.nx3);
+         Block3DPtr block1 = grid->getBlock(bcAddArray[ic].globalID);
+
+         //BCProcessorPtr bcProc(new BCProcessor());
+         BCProcessorPtr bcProc = bcProcessor->clone(block1->getKernel());
+         //BCArray3DPtr bcArr = bcProc->getBCArray();
+         BCArray3DPtr bcArr(new BCArray3D());
+         bcArr->bcindexmatrix = bcim;
+         bcArr->bcvector = bcVector;
+         bcArr->indexContainer = indexContainerV;
+         bcProc->setBCArray(bcArr);
+
+         block1->getKernel()->setBCProcessor(bcProc);
+
+        delete bcArray;
+        delete intArray1;
+        delete intArray2;
+
+        ic++;
+      }
+   }
+   MPI_File_close(&file_handler);
+
+   delete nullBouCond;
+
+   if (comm->isRoot())
+   {
+      UBLOG(logINFO, "MPIIORestart21CoProcessor::readBoundaryConds end of restore of data, rank = "<<rank);
+      UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
+   }
+}
+
+void MPIIORestart21CoProcessor::setChunk(int val)
+{
+   chunk = val;
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart21CoProcessor::setLBMKernel(LBMKernelPtr kernel)
+{
+   this->lbmKernel = kernel;
+}
+//////////////////////////////////////////////////////////////////////////
+void MPIIORestart21CoProcessor::setBCProcessor(BCProcessorPtr bcProcessor)
+{
+   this->bcProcessor = bcProcessor;
+}
+
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.h b/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.h
new file mode 100644
index 000000000..038292d33
--- /dev/null
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart21CoProcessor.h
@@ -0,0 +1,185 @@
+#ifndef _MPIIORestart21CoProcessor_H_
+#define _MPIIORestart21CoProcessor_H_
+
+#include "mpi.h"
+
+#include "CoProcessor.h"
+#include "Communicator.h"
+#include "WbWriter.h"
+
+#include <boost/shared_ptr.hpp>
+
+class MPIIORestart21CoProcessor;
+typedef boost::shared_ptr<MPIIORestart21CoProcessor> MPIIORestart21CoProcessorPtr;
+
+//! \class MPIWriteBlocksCoProcessor 
+//! \brief Writes the grid each timestep into the files and reads the grip from the files before regenerating  
+class MPIIORestart21CoProcessor: public CoProcessor
+{
+   //! \struct GridParam
+   //! \brief Structure describes parameters of the grid
+   //! \details The structure is nessasary to restore the grid correctly
+   struct GridParam
+    {
+      double trafoParams[33];
+      double deltaX;
+      int blockNx1;
+      int blockNx2;
+      int blockNx3;
+      int nx1;
+      int nx2;
+      int nx3;
+      bool periodicX1;
+      bool periodicX2;
+      bool periodicX3;
+      bool active;
+      bool transformation;
+    };
+
+   //! \struct Block3d
+   //! \brief Structure contains information of the block
+   //! \details The structure is used to write the data describing the block in the grid when saving the grid 
+   //! and to read it when restoring the grid
+   struct Block3d
+	{
+		int x1;
+		int x2;
+		int x3;
+      int bundle;
+		int rank;
+		int lrank;
+		int part;
+		int globalID;
+		int localID;
+		int level;
+		int interpolationFlagCF;
+		int interpolationFlagFC;
+		int counter;
+		bool active;
+	};
+
+   //! \struct dataSetParam
+   //! \brief Structure describes parameters of the dataSet that are equal in all blocks
+   //! \details The structure used to store some parameters needed to restore dataSet arrays
+   struct dataSetParam
+   {
+      int nx1; 
+      int nx2;
+      int nx3;
+      int nx[9][4]; // 9 arrays x (nx1, nx2, nx3, nx4)
+      int doubleCountInBlock;   // how many double-values are in all arrays dataSet in one (any) block
+   };
+
+   //! \struct dataSet
+   //! \brief Structure containes information identifying the block 
+   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
+   struct DataSet
+	{
+      double collFactor;
+      double deltaT;
+      int globalID;
+      int ghostLayerWidth;
+      bool compressible;
+      bool withForcing;
+   };
+   
+   //! \struct BoundaryCondition
+   //! \brief Structure containes information about boundary conditions of the block 
+   //! \details The structure is used to write data describing boundary conditions of the blocks when saving the grid 
+   //! and to read it when restoring the grid
+   struct BoundaryCondition
+	{
+		long long noslipBoundaryFlags;	//	MPI_LONG_LONG
+		long long slipBoundaryFlags;		
+		long long velocityBoundaryFlags;		
+		long long densityBoundaryFlags;		
+		long long wallModelBoundaryFlags;
+		
+		float  bcVelocityX1;
+		float  bcVelocityX2;
+		float  bcVelocityX3;
+		float  bcDensity;
+		
+		float  bcLodiDensity;
+		float  bcLodiVelocityX1;
+		float  bcLodiVelocityX2;
+		float  bcLodiVelocityX3;
+		float  bcLodiLentgh;
+		
+		float  nx1,nx2,nx3;
+		float q[26];					//	MPI_FLOAT
+
+      char algorithmType;
+   };
+
+   //! \struct boundCondParam
+   //! \brief Structure describes parameters of the boundaryConditions that are equal in all blocks
+   //! \details The structure used to store some parameters needed to restore boundaryConditions arrays
+   struct boundCondParam
+   {
+      int nx1;   
+      int nx2;
+      int nx3;
+      int bcindexmatrixCount;	// how many bcindexmatrix-values in one (any) block 
+   };
+
+   //! \struct BCAdd
+   //! \brief Structure containes information identifying the block 
+   //! and some parameters of the arrays of boundary conditions that are equal in all blocks
+   //! \details The structure is used to find the needed block in the grid when restoring a dataSet
+   //! and to set common parameters
+   struct BCAdd
+	{
+      int globalID;
+		//int x1;		//	to find the right block
+		//int x2;		
+		//int x3;		
+		//int level;	
+      int boundCond_count;		//	how many BoundaryCondition-structures are in this block
+      int indexContainer_count;	// how many indexContainer-values are in this block
+   };
+
+public:
+   MPIIORestart21CoProcessor(Grid3DPtr grid, UbSchedulerPtr s, const std::string& path, CommunicatorPtr comm);
+   virtual ~MPIIORestart21CoProcessor();
+   //! Each timestep writes the grid into the files
+   void process(double step);
+   //! Reads the grid from the files before grid reconstruction
+   void restart(int step);
+   //! Writes the blocks of the grid into the file outputBlocks.bin
+   void writeBlocks(int step);
+   //! Writes the datasets of the blocks into the file outputDataSet.bin
+   void writeDataSet(int step);
+   //! Writes the boundary conditions of the blocks into the file outputBoundCond.bin
+   void writeBoundaryConds(int step);
+   //! Reads the blocks of the grid from the file outputBlocks.bin
+   void readBlocks(int step);
+   //! Reads the datasets of the blocks from the file outputDataSet.bin
+   void readDataSet(int step);
+   //! Reads the boundary conditions of the blocks from the file outputBoundCond.bin
+   void readBoundaryConds(int step);
+   //! The function sets number of ranks that read simultaneously 
+   void setChunk(int val);
+   //! The function sets LBMKernel
+   void setLBMKernel(LBMKernelPtr kernel);
+   //!The function sets BCProcessor
+   void setBCProcessor(BCProcessorPtr bcProcessor);
+   //!The function truncates the data files
+   void clearAllFiles(int step);
+
+protected:
+   std::string path;
+   CommunicatorPtr comm;
+   bool mpiTypeFreeFlag;
+
+private:
+	MPI_Datatype gridParamType, block3dType, dataSetParamType, dataSetType, dataSetDoubleType, boundCondParamType, boundCondType, boundCondTypeAdd, bcindexmatrixType;
+   dataSetParam dataSetParamStr;
+   boundCondParam boundCondParamStr;
+   int chunk;
+   LBMKernelPtr lbmKernel;
+   BCProcessorPtr bcProcessor;
+
+};
+
+#endif 
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.cpp b/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.cpp
index 778a06060..29776c614 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.cpp
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.cpp
@@ -5,7 +5,7 @@
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include <UbSystem.h>
 #include <MemoryUtil.h>
-#include "RenumberBlockVisitor.h"
+#include "MetisPartitioningGridVisitor.h"
 
 MPIIORestart2CoProcessor::MPIIORestart2CoProcessor(Grid3DPtr grid, UbSchedulerPtr s,
    const std::string& path,
@@ -119,7 +119,15 @@ void MPIIORestart2CoProcessor::process(double step)
       writeBlocks((int)step);
       writeDataSet((int)step);
       writeBoundaryConds((int)step);
+
+      
+      //MPI_Barrier(MPI_COMM_WORLD);
+      
       if (comm->isRoot()) UBLOG(logINFO, "Save check point - end");
+      
+      
+      //readDataSet((int)step);
+
    }
 }
 //////////////////////////////////////////////////////////////////////////
@@ -162,13 +170,17 @@ void MPIIORestart2CoProcessor::writeBlocks(int step)
    //MPI_Comm_size(MPI_COMM_WORLD, &size);
    size=1;
 
-   if (comm->isRoot())
+   
+   //if (comm->isRoot())
    {
-      grid->deleteBlockIDs();
-      RenumberBlockVisitor renumber;
-      grid->accept(renumber);
+      //grid->deleteBlockIDs();
+      //RenumberBlockVisitor renumber;
+      //grid->accept(renumber);
+      grid->renumberBlockIDs();
    }
-   grid->updateDistributedBlocks(comm);
+   //grid->updateDistributedBlocks(comm);
+
+//UBLOG(logINFO, "MPIIORestart2CoProcessor::writeBlocks BlockIDs size =  "<<grid->getBlockIDs().size()<<" rank = "<<rank);
 
    if (comm->isRoot())
    {
@@ -458,6 +470,8 @@ void MPIIORestart2CoProcessor::writeDataSet(int step)
 
    DataSet* dataSetArray = new DataSet[blocksCount];
    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+   
+   //dataSetArrayTest = new DataSet[blocksCount];
 
    if (comm->isRoot())
    {
@@ -465,17 +479,27 @@ void MPIIORestart2CoProcessor::writeDataSet(int step)
       UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
    }
 
+//UBLOG(logINFO, "MPIIORestart2CoProcessor::writeDataSet minInitLevel="<<minInitLevel<<" maxInitLevel="<<maxInitLevel<<" blocksCount="<<blocksCount<<" rank = "<<rank);
+   
    int ic = 0;
    for (int level = minInitLevel; level<=maxInitLevel; level++)
    {
       BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
       {
          dataSetArray[ic].globalID = block->getGlobalID();     // id of the block needed to find it while regenerating the grid
+         //UBLOG(logINFO, "MPIIORestart2CoProcessor::writeDataSet dataSetArray[n].globalID: "<<dataSetArray[ic].globalID<< " rank = "<<rank<<" ic = "<<ic);
          dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
          dataSetArray[ic].collFactor = block->getKernel()->getCollisionFactor();
          dataSetArray[ic].deltaT = block->getKernel()->getDeltaT();
          dataSetArray[ic].compressible = block->getKernel()->getCompressible();
          dataSetArray[ic].withForcing = block->getKernel()->getWithForcing();
+         
+         //dataSetArrayTest[ic].globalID = block->getGlobalID();     // id of the block needed to find it while regenerating the grid
+         //dataSetArrayTest[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
+         //dataSetArrayTest[ic].collFactor = block->getKernel()->getCollisionFactor();
+         //dataSetArrayTest[ic].deltaT = block->getKernel()->getDeltaT();
+         //dataSetArrayTest[ic].compressible = block->getKernel()->getCompressible();
+         //dataSetArrayTest[ic].withForcing = block->getKernel()->getWithForcing();
 
          boost::shared_ptr< CbArray4D<LBMReal, IndexerX4X3X2X1> > AverageValuesArray3DPtr = block->getKernel()->getDataSet()->getAverageValues();
          if (AverageValuesArray3DPtr&&(blockParamStr.nx[0][0]>0)&&(blockParamStr.nx[0][1]>0)&&(blockParamStr.nx[0][2]>0)&&(blockParamStr.nx[0][3]>0))
@@ -746,6 +770,10 @@ void MPIIORestart2CoProcessor::restart(int step)
    if (comm->isRoot()) UBLOG(logINFO, "MPIIORestart2CoProcessor restart step: "<<step);
    if (comm->isRoot()) UBLOG(logINFO, "Load check point - start");
    readBlocks(step);
+   
+   Grid3DVisitorPtr metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
+   grid->accept(metisVisitor);
+   
    readDataSet(step);
    readBoundaryConds(step);
    if (comm->isRoot()) UBLOG(logINFO, "Load check point - end");
@@ -933,7 +961,7 @@ void MPIIORestart2CoProcessor::readDataSet(int step)
 
    DataSet* dataSetArray = new DataSet[blocksCount];
    std::vector<double> doubleValuesArray(blocksCount * blockParamStr.doubleCountInBlock); // double-values in all blocks 
-
+   
    MPI_File file_handler;
    std::string filename = path+"/mpi_io_cp/mpi_io_cp_"+UbSystem::toString(step)+"/cpDataSet.bin";
    int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
@@ -947,6 +975,7 @@ void MPIIORestart2CoProcessor::readDataSet(int step)
       BOOST_FOREACH(Block3DPtr block, blocksVector[level])  //	blocks of the current level
       {
          read_offset = (MPI_Offset)(block->getGlobalID() * sizeofOneDataSet);
+         //UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet read_offset="<<read_offset<<" sizeofOneDataSet="<<sizeofOneDataSet<<" GlobalID="<<block->getGlobalID()<< " rank="<<rank);
          MPI_File_read_at(file_handler, read_offset, &dataSetArray[ic], 1, dataSetType, MPI_STATUS_IGNORE);
          MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(DataSet)), &doubleValuesArray[ic*blockParamStr.doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
          ic++;
@@ -955,6 +984,7 @@ void MPIIORestart2CoProcessor::readDataSet(int step)
 
    MPI_File_close(&file_handler);
 
+
    if (comm->isRoot())
    {
       finish = MPI_Wtime();
@@ -962,6 +992,19 @@ void MPIIORestart2CoProcessor::readDataSet(int step)
       UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet start of restore of data, rank = "<<rank);
       UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
    }
+
+   //for (int n = 0; n<blocksCount; n++)
+   //{
+      //if(dataSetArray[n].globalID != dataSetArrayTest[n].globalID)
+      //{
+         //UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet dataSetArray[n].globalID: "<<dataSetArray[n].globalID<< " rank = "<<rank<<" n = "<<n);
+         ////UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet dataSetArray[n].globalID: "<<dataSetArray[n].globalID);
+         ////UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet SetArrayTest[n].globalID: "<<dataSetArrayTest[n].globalID);
+      //}
+   //}
+   
+   //UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet grid size 1: "<<grid->getNumberOfBlocks()<<" rank = "<<rank);
+   //UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet BlockIDs size 1: "<<grid->getBlockIDs().size()<<" rank = "<<rank);
    
    size_t index = 0, nextVectorSize = 0;
    std::vector<double> vectorsOfValues[9];
@@ -1033,7 +1076,13 @@ void MPIIORestart2CoProcessor::readDataSet(int step)
 
       // find the nesessary block and fill it
       Block3DPtr block = grid->getBlock(dataSetArray[n].globalID);
-      //LBMKernelPtr kernel(new CompressibleCumulantLBMKernel(0, 0, 0, CompressibleCumulantLBMKernel::NORMAL));
+      //Block3DPtr block = grid->getBlock(2);
+      if (!block)
+      {
+         UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet don't find block dataSetArray[n].globalID: "<<dataSetArray[n].globalID<< " rank = "<<rank<<" n = "<<n);
+         UBLOG(logINFO, "MPIIORestart2CoProcessor::readDataSet grid size 2: "<<grid->getNumberOfBlocks()<<" rank = "<<rank);
+      }
+      ////LBMKernelPtr kernel(new CompressibleCumulantLBMKernel(0, 0, 0, CompressibleCumulantLBMKernel::NORMAL));
       LBMKernelPtr kernel = this->lbmKernel->clone();
       kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
       kernel->setCollisionFactor(dataSetArray[n].collFactor);
@@ -1097,8 +1146,8 @@ void MPIIORestart2CoProcessor::readBoundaryConds(int step)
 
    if (comm->isRoot())
    {
-      finish = MPI_Wtime();
-      UBLOG(logINFO, "MPIIORestart2CoProcessor::readBoundaryConds time: " << finish - start << " s");
+      //finish = MPI_Wtime();
+      //UBLOG(logINFO, "MPIIORestart2CoProcessor::readBoundaryConds time: " << finish - start << " s");
       UBLOG(logINFO, "MPIIORestart2CoProcessor::readBoundaryConds start of restore of data, rank = " << rank);
       UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
    }
@@ -1203,6 +1252,8 @@ void MPIIORestart2CoProcessor::readBoundaryConds(int step)
    if (comm->isRoot())
    {
       UBLOG(logINFO, "MPIIORestart2CoProcessor::readBoundaryConds end of restore of data, rank = "<<rank);
+      finish = MPI_Wtime();
+      UBLOG(logINFO, "MPIIORestart2CoProcessor::readBoundaryConds time: " << finish - start << " s");
       UBLOG(logINFO, "Physical Memory currently used by current process: "<<Utilities::getPhysMemUsedByMe()/1073741824.0<<" GB");
    }
 }
diff --git a/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.h b/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.h
index 197e3b05b..9500ac7cb 100644
--- a/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.h
+++ b/source/VirtualFluidsCore/CoProcessors/MPIIORestart2CoProcessor.h
@@ -168,6 +168,8 @@ private:
    int chunk;
    LBMKernelPtr lbmKernel;
    BCProcessorPtr bcProcessor;
+   
+   DataSet* dataSetArrayTest;
 
 };
 
diff --git a/source/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp b/source/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
index e398a0e82..47774d6cc 100644
--- a/source/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
+++ b/source/VirtualFluidsCore/CoProcessors/TimeAveragedValuesCoProcessor.cpp
@@ -210,6 +210,8 @@ void TimeAveragedValuesCoProcessor::addData(const Block3DPtr block)
    datanames.resize(0);
 
    datanames.push_back("level");
+   
+   datanames.push_back("Rho");
 
    if ((options&Density) == Density)
    {
@@ -281,6 +283,9 @@ void TimeAveragedValuesCoProcessor::addData(const Block3DPtr block)
    maxX2 -= 2;
    maxX3 -= 2;
 
+   LBMReal f[D3Q27System::ENDF+1];
+   LBMReal vx1,vx2,vx3,rho;
+
    //D3Q27BoundaryConditionPtr bcPtr;
 
    int nr = (int)nodes.size();
@@ -301,6 +306,11 @@ void TimeAveragedValuesCoProcessor::addData(const Block3DPtr block)
 
                data[index++].push_back(level);
 
+               distributions->getDistribution(f, ix1, ix2, ix3);
+               calcMacros(f, rho, vx1, vx2, vx3);
+
+               data[index++].push_back(rho);
+
                if ((options&Density) == Density)
                {
                   data[index++].push_back((*ar)(0, ix1, ix2, ix3));
diff --git a/source/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h b/source/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
index 7a6a85da2..767c85a23 100644
--- a/source/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
+++ b/source/VirtualFluidsCore/Data/D3Q27EsoTwist3DSplittedVector.h
@@ -63,9 +63,10 @@ protected:
    CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
    size_t NX1, NX2, NX3;
 
-   friend class MPIIORestartCoProcessor;
+   friend class MPIIORestart1CoProcessor;
    friend class MPIIORestart2CoProcessor;
-   friend class MPIIORestart3CoProcessor;
+   friend class MPIIORestart11CoProcessor;
+   friend class MPIIORestart21CoProcessor;
 
    friend class boost::serialization::access;
    template<class Archive>
diff --git a/source/VirtualFluidsCore/Grid/Block3D.h b/source/VirtualFluidsCore/Grid/Block3D.h
index a2e6d0e91..aaba47197 100644
--- a/source/VirtualFluidsCore/Grid/Block3D.h
+++ b/source/VirtualFluidsCore/Grid/Block3D.h
@@ -108,6 +108,7 @@ public:
    std::string toString() ;
 
    static int getMaxGlobalID() { return counter; }
+   static void setMaxGlobalID(int c) { counter = 0; }
 private:
   int   x1;
   int   x2;
@@ -131,9 +132,10 @@ private:
   int level;
   static int counter;
 
-  friend class MPIIORestartCoProcessor;
+  friend class MPIIORestart1CoProcessor;
   friend class MPIIORestart2CoProcessor;
-  friend class MPIIORestart3CoProcessor;
+  friend class MPIIORestart11CoProcessor;
+  friend class MPIIORestart21CoProcessor;
 
   friend class boost::serialization::access;
   template<class Archive>
diff --git a/source/VirtualFluidsCore/Grid/Grid3D.cpp b/source/VirtualFluidsCore/Grid/Grid3D.cpp
index e7a01c447..7943635ee 100644
--- a/source/VirtualFluidsCore/Grid/Grid3D.cpp
+++ b/source/VirtualFluidsCore/Grid/Grid3D.cpp
@@ -1943,6 +1943,29 @@ void Grid3D::deleteBlockIDs()
 {
    this->blockIdMap.clear();
 }
+//////////////////////////////////////////////////////////////////////////
+void Grid3D::renumberBlockIDs()
+{
+   deleteBlockIDs();
+
+   int startLevel = getCoarsestInitializedLevel();
+   int stopLevel = getFinestInitializedLevel();
+   int counter = 0;
+   
+   for (int l = startLevel; l <= stopLevel; l++)
+   {
+      std::vector<Block3DPtr> blockVector;
+      getBlocks(l, blockVector);
+      BOOST_FOREACH(Block3DPtr block, blockVector)
+      {
+         block->setGlobalID(counter);
+         blockIdMap.insert(std::make_pair(counter, block));
+         Block3D::setMaxGlobalID(counter);
+         counter++;
+      }
+   }
+}
+
 //////////////////////////////////////////////////////////////////////////
 void Grid3D::updateDistributedBlocks(CommunicatorPtr comm)
 {
@@ -1965,6 +1988,7 @@ void Grid3D::updateDistributedBlocks(CommunicatorPtr comm)
             blocks.push_back(block->getX3());
             blocks.push_back(l);
             blocks.push_back(block->getGlobalID());
+            blocks.push_back(block->getRank());
          }
       }
    }
@@ -1986,10 +2010,11 @@ void Grid3D::updateDistributedBlocks(CommunicatorPtr comm)
       levelSet.resize(Grid3DSystem::MAXLEVEL+1);
 
       int rsize = blocks.size();
-      for (int i = 0; i < rsize; i+=5)
+      for (int i = 0; i < rsize; i+=6)
       {
          Block3DPtr block(new Block3D(blocks[i], blocks[i+1], blocks[i+2], blocks[i+3]));
          block->setGlobalID(blocks[i+4]);
+         block->setRank(blocks[i+5]);
          this->addBlock(block);
       }
 
diff --git a/source/VirtualFluidsCore/Grid/Grid3D.h b/source/VirtualFluidsCore/Grid/Grid3D.h
index e737a8d59..5662b51cb 100644
--- a/source/VirtualFluidsCore/Grid/Grid3D.h
+++ b/source/VirtualFluidsCore/Grid/Grid3D.h
@@ -75,6 +75,7 @@ public:
    //const Block3DMap& getBlocks(int level);
    const BlockIDMap& getBlockIDs();
    void deleteBlockIDs();
+   void renumberBlockIDs();
    void updateDistributedBlocks(CommunicatorPtr comm);
    Block3DPtr getSuperBlock(Block3DPtr block);
    Block3DPtr getSuperBlock(int ix1, int ix2, int ix3, int level);
diff --git a/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.cpp b/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.cpp
new file mode 100644
index 000000000..4a19222bf
--- /dev/null
+++ b/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.cpp
@@ -0,0 +1,1081 @@
+#include "CompressibleCumulant2LBMKernel.h"
+#include "D3Q27System.h"
+#include "InterpolationProcessor.h"
+#include "D3Q27EsoTwist3DSplittedVector.h"
+#include <math.h>
+#include <omp.h>
+
+#define PROOF_CORRECTNESS
+
+//////////////////////////////////////////////////////////////////////////
+CompressibleCumulant2LBMKernel::CompressibleCumulant2LBMKernel()
+{
+   this->nx1 = 0;
+   this->nx2 = 0;
+   this->nx3 = 0;
+   this->parameter = NORMAL;
+   this->OxyyMxzz = 1.0;
+   this->compressible = true;
+   this->bulkOmegaToOmega = false;
+   this->OxxPyyPzz = 1.0;
+}
+//////////////////////////////////////////////////////////////////////////
+CompressibleCumulant2LBMKernel::CompressibleCumulant2LBMKernel(int nx1, int nx2, int nx3, Parameter p) 
+{
+   this->nx1 = nx1;
+   this->nx2 = nx2;
+   this->nx3 = nx3;
+   this->parameter = p;
+   this->OxyyMxzz = 1.0;
+   this->compressible = true;
+   this->bulkOmegaToOmega = false;
+   this->OxxPyyPzz = 1.0;
+}
+//////////////////////////////////////////////////////////////////////////
+CompressibleCumulant2LBMKernel::~CompressibleCumulant2LBMKernel(void)
+{
+
+}
+//////////////////////////////////////////////////////////////////////////
+void CompressibleCumulant2LBMKernel::init()
+{
+   DistributionArray3DPtr d(new D3Q27EsoTwist3DSplittedVector(nx1+2, nx2+2, nx3+2, -999.0));
+   dataSet->setFdistributions(d);
+}
+//////////////////////////////////////////////////////////////////////////
+LBMKernelPtr CompressibleCumulant2LBMKernel::clone()
+{
+   LBMKernelPtr kernel(new CompressibleCumulant2LBMKernel(nx1, nx2, nx3, parameter));
+   boost::dynamic_pointer_cast<CompressibleCumulant2LBMKernel>(kernel)->init();
+   kernel->setCollisionFactor(this->collFactor);
+   kernel->setBCProcessor(bcProcessor->clone(kernel));
+   kernel->setWithForcing(withForcing);
+   kernel->setForcingX1(muForcingX1);
+   kernel->setForcingX2(muForcingX2);
+   kernel->setForcingX3(muForcingX3);
+   kernel->setIndex(ix1, ix2, ix3);
+   kernel->setDeltaT(deltaT);
+   switch (parameter)
+   {
+   case NORMAL:
+      boost::dynamic_pointer_cast<CompressibleCumulant2LBMKernel>(kernel)->OxyyMxzz = 1.0;
+      break;
+   case MAGIC:
+      boost::dynamic_pointer_cast<CompressibleCumulant2LBMKernel>(kernel)->OxyyMxzz = 2.0 +(-collFactor);
+      break;
+   }
+
+   if (bulkOmegaToOmega)
+   {
+      boost::dynamic_pointer_cast<CompressibleCumulant2LBMKernel>(kernel)->OxxPyyPzz = collFactor;
+   }
+   else
+   {
+       boost::dynamic_pointer_cast<CompressibleCumulant2LBMKernel>(kernel)->OxxPyyPzz = one;
+   }
+   return kernel;
+}
+//////////////////////////////////////////////////////////////////////////
+void CompressibleCumulant2LBMKernel::calculate()
+{
+   timer.resetAndStart();
+   collideAll();
+   timer.stop();
+}
+//////////////////////////////////////////////////////////////////////////
+void CompressibleCumulant2LBMKernel::collideAll()
+{
+   using namespace D3Q27System;
+
+   //initializing of forcing stuff 
+   if (withForcing)
+   {
+      muForcingX1.DefineVar("x1", &muX1); muForcingX1.DefineVar("x2", &muX2); muForcingX1.DefineVar("x3", &muX3);
+      muForcingX2.DefineVar("x1", &muX1); muForcingX2.DefineVar("x2", &muX2); muForcingX2.DefineVar("x3", &muX3);
+      muForcingX3.DefineVar("x1", &muX1); muForcingX3.DefineVar("x2", &muX2); muForcingX3.DefineVar("x3", &muX3);
+
+      muDeltaT = deltaT;
+
+      muForcingX1.DefineVar("dt", &muDeltaT);
+      muForcingX2.DefineVar("dt", &muDeltaT);
+      muForcingX3.DefineVar("dt", &muDeltaT);
+
+      muNu = (1.0/3.0)*(1.0/collFactor - 1.0/2.0);
+
+      muForcingX1.DefineVar("nu", &muNu);
+      muForcingX2.DefineVar("nu", &muNu);
+      muForcingX3.DefineVar("nu", &muNu);
+
+      LBMReal forcingX1 = 0;
+      LBMReal forcingX2 = 0;
+      LBMReal forcingX3 = 0;
+   }
+   /////////////////////////////////////
+
+   localDistributions = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
+   nonLocalDistributions = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions();
+   zeroDistributions = boost::dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
+
+   BCArray3DPtr bcArray = this->getBCProcessor()->getBCArray();
+
+   const int bcArrayMaxX1 = (int)bcArray->getNX1();
+   const int bcArrayMaxX2 = (int)bcArray->getNX2();
+   const int bcArrayMaxX3 = (int)bcArray->getNX3();
+
+   int minX1 = ghostLayerWidth;
+   int minX2 = ghostLayerWidth;
+   int minX3 = ghostLayerWidth;
+   int maxX1 = bcArrayMaxX1-ghostLayerWidth;
+   int maxX2 = bcArrayMaxX2-ghostLayerWidth;
+   int maxX3 = bcArrayMaxX3-ghostLayerWidth;
+
+   LBMReal omega = collFactor;
+
+
+   //#pragma omp parallel num_threads(8)
+   {
+      //   int i = omp_get_thread_num();
+      //   printf_s("Hello from thread %d\n", i);
+      //}
+   //#pragma omp for 
+      for (int x3 = minX3; x3 < maxX3; x3++)
+      {
+         for (int x2 = minX2; x2 < maxX2; x2++)
+         {
+            for (int x1 = minX1; x1 < maxX1; x1++)
+            {
+               if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3))
+               {
+                  int x1p = x1 + 1;
+                  int x2p = x2 + 1;
+                  int x3p = x3 + 1;
+                  //////////////////////////////////////////////////////////////////////////
+                  //read distribution
+                  ////////////////////////////////////////////////////////////////////////////
+                  //////////////////////////////////////////////////////////////////////////
+
+                  //E   N  T
+                  //c   c  c
+                  //////////
+                  //W   S  B
+                  //a   a  a
+
+                  //Rest ist b
+
+                  //mfxyz
+                  //a - negative
+                  //b - null
+                  //c - positive
+
+                  // a b c
+                  //-1 0 1
+
+                  LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                  LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                  LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                  LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                  LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                  LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                  LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                  LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                  LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                  LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                  LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                  LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                  LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                  LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                  LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                  LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                  LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                  LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                  LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                  LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                  LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                  LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                  LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                  LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                  LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                  LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                  LBMReal mfbbb = (*this->zeroDistributions)(x1, x2, x3);
+
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  LBMReal drho = ((((mfccc+mfaaa)+(mfaca+mfcac))+((mfacc+mfcaa)+(mfaac+mfcca)))+
+                     (((mfbac+mfbca)+(mfbaa+mfbcc))+((mfabc+mfcba)+(mfaba+mfcbc))+((mfacb+mfcab)+(mfaab+mfccb)))+
+                     ((mfabb+mfcbb)+(mfbab+mfbcb))+(mfbba+mfbbc))+mfbbb;
+
+                  LBMReal rho = one+drho;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  LBMReal vvx = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfcaa-mfacc)+(mfcca-mfaac)))+
+                     (((mfcba-mfabc)+(mfcbc-mfaba))+((mfcab-mfacb)+(mfccb-mfaab)))+
+                     (mfcbb-mfabb))/rho;
+                  LBMReal vvy = ((((mfccc-mfaaa)+(mfaca-mfcac))+((mfacc-mfcaa)+(mfcca-mfaac)))+
+                     (((mfbca-mfbac)+(mfbcc-mfbaa))+((mfacb-mfcab)+(mfccb-mfaab)))+
+                     (mfbcb-mfbab))/rho;
+                  LBMReal vvz = ((((mfccc-mfaaa)+(mfcac-mfaca))+((mfacc-mfcaa)+(mfaac-mfcca)))+
+                     (((mfbac-mfbca)+(mfbcc-mfbaa))+((mfabc-mfcba)+(mfcbc-mfaba)))+
+                     (mfbbc-mfbba))/rho;
+                  ////////////////////////////////////////////////////////////////////////////////////
+
+                  //forcing 
+                  ///////////////////////////////////////////////////////////////////////////////////////////
+                  if (withForcing)
+                  {
+                     muX1 = static_cast<double>(x1-1+ix1*maxX1);
+                     muX2 = static_cast<double>(x2-1+ix2*maxX2);
+                     muX3 = static_cast<double>(x3-1+ix3*maxX3);
+
+                     forcingX1 = muForcingX1.Eval();
+                     forcingX2 = muForcingX2.Eval();
+                     forcingX3 = muForcingX3.Eval();
+
+                     vvx += forcingX1*deltaT*0.5; // X
+                     vvy += forcingX2*deltaT*0.5; // Y
+                     vvz += forcingX3*deltaT*0.5; // Z
+                  }
+                  ///////////////////////////////////////////////////////////////////////////////////////////               
+            ////////////////////////////////////////////////////////////////////////////////////
+                  LBMReal oMdrho = one; // comp special
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  LBMReal m0, m1, m2;
+                  LBMReal vx2;
+                  LBMReal vy2;
+                  LBMReal vz2;
+                  vx2 = vvx*vvx;
+                  vy2 = vvy*vvy;
+                  vz2 = vvz*vvz;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  LBMReal wadjust;
+                  LBMReal qudricLimitP = 0.001;// * 0.0001f;
+                  LBMReal qudricLimitM = 0.001;// * 0.0001f;
+                  LBMReal qudricLimitD = 0.001;// * 0.001f;
+                  //LBMReal s9 = minusomega;
+                  //test
+                  //s9 = 0.;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  //Hin
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // Z - Dir
+                  m2 = mfaaa+mfaac;
+                  m1 = mfaac-mfaaa;
+                  m0 = m2+mfaab;
+                  mfaaa = m0;
+                  m0 += c1o36 * oMdrho;
+                  mfaab = m1-m0 * vvz;
+                  mfaac = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaba+mfabc;
+                  m1 = mfabc-mfaba;
+                  m0 = m2+mfabb;
+                  mfaba = m0;
+                  m0 += c1o9 * oMdrho;
+                  mfabb = m1-m0 * vvz;
+                  mfabc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaca+mfacc;
+                  m1 = mfacc-mfaca;
+                  m0 = m2+mfacb;
+                  mfaca = m0;
+                  m0 += c1o36 * oMdrho;
+                  mfacb = m1-m0 * vvz;
+                  mfacc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbaa+mfbac;
+                  m1 = mfbac-mfbaa;
+                  m0 = m2+mfbab;
+                  mfbaa = m0;
+                  m0 += c1o9 * oMdrho;
+                  mfbab = m1-m0 * vvz;
+                  mfbac = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbba+mfbbc;
+                  m1 = mfbbc-mfbba;
+                  m0 = m2+mfbbb;
+                  mfbba = m0;
+                  m0 += c4o9 * oMdrho;
+                  mfbbb = m1-m0 * vvz;
+                  mfbbc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbca+mfbcc;
+                  m1 = mfbcc-mfbca;
+                  m0 = m2+mfbcb;
+                  mfbca = m0;
+                  m0 += c1o9 * oMdrho;
+                  mfbcb = m1-m0 * vvz;
+                  mfbcc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcaa+mfcac;
+                  m1 = mfcac-mfcaa;
+                  m0 = m2+mfcab;
+                  mfcaa = m0;
+                  m0 += c1o36 * oMdrho;
+                  mfcab = m1-m0 * vvz;
+                  mfcac = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcba+mfcbc;
+                  m1 = mfcbc-mfcba;
+                  m0 = m2+mfcbb;
+                  mfcba = m0;
+                  m0 += c1o9 * oMdrho;
+                  mfcbb = m1-m0 * vvz;
+                  mfcbc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcca+mfccc;
+                  m1 = mfccc-mfcca;
+                  m0 = m2+mfccb;
+                  mfcca = m0;
+                  m0 += c1o36 * oMdrho;
+                  mfccb = m1-m0 * vvz;
+                  mfccc = m2-two*	m1 * vvz+vz2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // Y - Dir
+                  m2 = mfaaa+mfaca;
+                  m1 = mfaca-mfaaa;
+                  m0 = m2+mfaba;
+                  mfaaa = m0;
+                  m0 += c1o6 * oMdrho;
+                  mfaba = m1-m0 * vvy;
+                  mfaca = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaab+mfacb;
+                  m1 = mfacb-mfaab;
+                  m0 = m2+mfabb;
+                  mfaab = m0;
+                  mfabb = m1-m0 * vvy;
+                  mfacb = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaac+mfacc;
+                  m1 = mfacc-mfaac;
+                  m0 = m2+mfabc;
+                  mfaac = m0;
+                  m0 += c1o18 * oMdrho;
+                  mfabc = m1-m0 * vvy;
+                  mfacc = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbaa+mfbca;
+                  m1 = mfbca-mfbaa;
+                  m0 = m2+mfbba;
+                  mfbaa = m0;
+                  m0 += c2o3 * oMdrho;
+                  mfbba = m1-m0 * vvy;
+                  mfbca = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbab+mfbcb;
+                  m1 = mfbcb-mfbab;
+                  m0 = m2+mfbbb;
+                  mfbab = m0;
+                  mfbbb = m1-m0 * vvy;
+                  mfbcb = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfbac+mfbcc;
+                  m1 = mfbcc-mfbac;
+                  m0 = m2+mfbbc;
+                  mfbac = m0;
+                  m0 += c2o9 * oMdrho;
+                  mfbbc = m1-m0 * vvy;
+                  mfbcc = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcaa+mfcca;
+                  m1 = mfcca-mfcaa;
+                  m0 = m2+mfcba;
+                  mfcaa = m0;
+                  m0 += c1o6 * oMdrho;
+                  mfcba = m1-m0 * vvy;
+                  mfcca = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcab+mfccb;
+                  m1 = mfccb-mfcab;
+                  m0 = m2+mfcbb;
+                  mfcab = m0;
+                  mfcbb = m1-m0 * vvy;
+                  mfccb = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfcac+mfccc;
+                  m1 = mfccc-mfcac;
+                  m0 = m2+mfcbc;
+                  mfcac = m0;
+                  m0 += c1o18 * oMdrho;
+                  mfcbc = m1-m0 * vvy;
+                  mfccc = m2-two*	m1 * vvy+vy2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // X - Dir
+                  m2 = mfaaa+mfcaa;
+                  m1 = mfcaa-mfaaa;
+                  m0 = m2+mfbaa;
+                  mfaaa = m0;
+                  m0 += one* oMdrho;
+                  mfbaa = m1-m0 * vvx;
+                  mfcaa = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaba+mfcba;
+                  m1 = mfcba-mfaba;
+                  m0 = m2+mfbba;
+                  mfaba = m0;
+                  mfbba = m1-m0 * vvx;
+                  mfcba = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaca+mfcca;
+                  m1 = mfcca-mfaca;
+                  m0 = m2+mfbca;
+                  mfaca = m0;
+                  m0 += c1o3 * oMdrho;
+                  mfbca = m1-m0 * vvx;
+                  mfcca = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaab+mfcab;
+                  m1 = mfcab-mfaab;
+                  m0 = m2+mfbab;
+                  mfaab = m0;
+                  mfbab = m1-m0 * vvx;
+                  mfcab = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfabb+mfcbb;
+                  m1 = mfcbb-mfabb;
+                  m0 = m2+mfbbb;
+                  mfabb = m0;
+                  mfbbb = m1-m0 * vvx;
+                  mfcbb = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfacb+mfccb;
+                  m1 = mfccb-mfacb;
+                  m0 = m2+mfbcb;
+                  mfacb = m0;
+                  mfbcb = m1-m0 * vvx;
+                  mfccb = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfaac+mfcac;
+                  m1 = mfcac-mfaac;
+                  m0 = m2+mfbac;
+                  mfaac = m0;
+                  m0 += c1o3 * oMdrho;
+                  mfbac = m1-m0 * vvx;
+                  mfcac = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfabc+mfcbc;
+                  m1 = mfcbc-mfabc;
+                  m0 = m2+mfbbc;
+                  mfabc = m0;
+                  mfbbc = m1-m0 * vvx;
+                  mfcbc = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m2 = mfacc+mfccc;
+                  m1 = mfccc-mfacc;
+                  m0 = m2+mfbcc;
+                  mfacc = m0;
+                  m0 += c1o9 * oMdrho;
+                  mfbcc = m1-m0 * vvx;
+                  mfccc = m2-two*	m1 * vvx+vx2 * m0;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+
+
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // Cumulants
+                  ////////////////////////////////////////////////////////////////////////////////////
+ 
+                  //LBMReal OxxPyyPzz = one; // bulk viscosity
+
+                  ////////////////////////////////////////////////////////////
+                  //3.
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz = one;//three  * (two - omega) / (three  - omega);//
+                  //LBMReal OxyyMxzz = one;//six    * (two - omega) / (six    - omega);//
+                  //LBMReal Oxyz = one;//twelve * (two - omega) / (twelve + omega);//
+
+                  LBMReal OxyyPxzz  = eight*(-two+omega)*(one+two*omega)/(-eight-fourteen*omega+seven*omega*omega);//one;
+
+                  LBMReal OxyyMxzz  = eight*(-two+omega)*(-seven+four*omega)/(fiftysix-fifty*omega+nine*omega*omega);//one;
+
+                  LBMReal Oxyz      = twentyfour*(-two+omega)*(-two-seven*omega+three*omega*omega)/(fourtyeight+c152*omega-c130*omega*omega+twentynine*omega*omega*omega);
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = two-omega;//
+                  //LBMReal OxyyMxzz  = two-omega;//
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = (eight * (omega - two)) / (omega - eight);//Ginzburg
+                  //LBMReal OxyyMxzz  = (eight * (omega - two)) / (omega - eight);//Ginzburg
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = omega;//BGK
+                  //LBMReal OxyyMxzz  = omega;//BGK
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = (one + omega) / two;//1P5
+                  //LBMReal OxyyMxzz  = (one + omega) / two;//1P5
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = (three - omega) / two;//0P5
+                  //LBMReal OxyyMxzz  = (three - omega) / two;//0P5
+                  //////////////////////////////
+                  //LBMReal OxyyPxzz  = (one + (eight * (omega - two)) / (omega - eight)) / two;//one + Ginzburg / two ... Car
+                  //LBMReal OxyyMxzz  = (one + (eight * (omega - two)) / (omega - eight)) / two;//one + Ginzburg / two ... Car
+                  ////////////////////////////////////////////////////////////
+                  //4.
+                  //////////////////////////////
+                  LBMReal O4 = one;
+                  //////////////////////////////
+                  //LBMReal O4        = omega;//TRT
+                  ////////////////////////////////////////////////////////////
+                  //5.
+                  //////////////////////////////
+                  LBMReal O5 = one;
+                  ////////////////////////////////////////////////////////////
+                  //6.
+                  //////////////////////////////
+                  LBMReal O6 = one;
+                  ////////////////////////////////////////////////////////////
+
+
+                  //central moments to cumulants
+                  //4.
+                  LBMReal CUMcbb = mfcbb-((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;	//ab 15.05.2015 verwendet
+                  LBMReal CUMbcb = mfbcb-((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho; //ab 15.05.2015 verwendet
+                  LBMReal CUMbbc = mfbbc-((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho; //ab 15.05.2015 verwendet
+
+                  LBMReal CUMcca = mfcca-(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));
+                  LBMReal CUMcac = mfcac-(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));
+                  LBMReal CUMacc = mfacc-(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));
+
+                  //5.
+                  LBMReal CUMbcc = mfbcc-((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  LBMReal CUMcbc = mfcbc-((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  LBMReal CUMccb = mfccb-((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+
+                  //6.
+
+                  LBMReal CUMccc = mfccc+((-four *  mfbbb * mfbbb
+                     -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
+                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +two * (mfcaa * mfaca * mfaac)
+                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c1o3 * (mfacc+mfcac+mfcca)/rho
+                     -c1o9 * (mfcaa+mfaca+mfaac)/rho
+                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                        +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
+                     +c1o27*((drho * drho-drho)/(rho*rho)));
+                  //+ c1o27*(one -three/rho +two/(rho*rho)));
+
+
+
+
+      //2.
+      // linear combinations
+                  LBMReal mxxPyyPzz = mfcaa+mfaca+mfaac;
+                  LBMReal mxxMyy = mfcaa-mfaca;
+                  LBMReal mxxMzz = mfcaa-mfaac;
+
+                  //////////////////////////////////////////////////////////////////////////
+         // 			LBMReal magicBulk=(CUMacc+CUMcac+CUMcca)*(one/OxxPyyPzz-c1o2)*c3o2*8.;
+
+                  //////////////////////////////////////////////////////////////////////////
+                  //limiter-Scheise Teil 1
+                  //LBMReal oxxyy,oxxzz,oxy,oxz,oyz;
+                  //LBMReal smag=0.001;
+                  //oxxyy    = omega+(one-omega)*abs(mxxMyy)/(abs(mxxMyy)+smag);
+                  //oxxzz    = omega+(one-omega)*abs(mxxMzz)/(abs(mxxMzz)+smag);
+                  //oxy      = omega+(one-omega)*abs(mfbba)/(abs(mfbba)+smag);
+                  //oxz      = omega+(one-omega)*abs(mfbab)/(abs(mfbab)+smag);
+                  //oyz      = omega+(one-omega)*abs(mfabb)/(abs(mfabb)+smag);
+
+                  ////////////////////////////////////////////////////////////////////////////
+                  ////Teil 1b
+                  //LBMReal constante = 1000.0;
+                  //LBMReal nuEddi = constante * abs(mxxPyyPzz);
+                  //LBMReal omegaLimit = one / (one / omega + three * nuEddi);
+
+                  //{
+                  //	LBMReal dxux = c1o2 * (-omegaLimit) *(mxxMyy + mxxMzz) +  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                  //	LBMReal dyuy = dxux + omegaLimit * c3o2 * mxxMyy;
+                  //	LBMReal dzuz = dxux + omegaLimit * c3o2 * mxxMzz;
+
+                     ////relax
+                     //mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- three * (one - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+                     //mxxMyy    += omegaLimit * (-mxxMyy) - three * (one + c1o2 * (-omegaLimit)) * (vx2 * dxux + vy2 * dyuy);
+                     //mxxMzz    += omegaLimit * (-mxxMzz) - three * (one + c1o2 * (-omegaLimit)) * (vx2 * dxux + vz2 * dzuz);
+
+                  //}
+                  //mfabb     += omegaLimit * (-mfabb);
+                  //mfbab     += omegaLimit * (-mfbab);
+                  //mfbba     += omegaLimit * (-mfbba);
+                  ////////////////////////////////////////////////////////////////////////////
+
+                  ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+                  //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
+                  
+                     LBMReal dxux = c1o2 * (-omega) *(mxxMyy+mxxMzz)+c1o2 *  OxxPyyPzz * (mfaaa-mxxPyyPzz);
+                     LBMReal dyuy = dxux+omega * c3o2 * mxxMyy;
+                     LBMReal dzuz = dxux+omega * c3o2 * mxxMzz;
+
+                     LBMReal Dxy =-three*omega*mfbba;
+                     LBMReal Dxz =-three*omega*mfbab;
+                     LBMReal Dyz =-three*omega*mfabb;
+
+
+
+                     //relax
+                     mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz)-three * (one-c1o2 * OxxPyyPzz) * (vx2 * dxux+vy2 * dyuy+vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                     mxxMyy += omega * (-mxxMyy)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vy2 * dyuy);
+                     mxxMzz += omega * (-mxxMzz)-three * (one+c1o2 * (-omega)) * (vx2 * dxux-vz2 * dzuz);
+
+                     //////////////////////////////////////////////////////////////////////////
+                     //limiter-Scheise Teil 2
+                     //mxxMyy    += oxxyy * (-mxxMyy) - three * (one + c1o2 * (-omega)) * (vx2 * dxux + vy2 * dyuy);
+                     //mxxMzz    += oxxzz * (-mxxMzz) - three * (one + c1o2 * (-omega)) * (vx2 * dxux + vz2 * dzuz);
+                     //////////////////////////////////////////////////////////////////////////
+
+                  
+                  ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+                  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+                  ////no correction
+                  //mxxPyyPzz += OxxPyyPzz*(mfaaa-mxxPyyPzz);//-magicBulk*OxxPyyPzz;
+                  //mxxMyy    += -(-omega) * (-mxxMyy);
+                  //mxxMzz    += -(-omega) * (-mxxMzz);
+                  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+                  mfabb += omega * (-mfabb);
+                  mfbab += omega * (-mfbab);
+                  mfbba += omega * (-mfbba);
+
+                  //////////////////////////////////////////////////////////////////////////
+                  //limiter-Scheise Teil 3
+                  //mfabb     += oyz * (-mfabb);
+                  //mfbab     += oxz * (-mfbab);
+                  //mfbba     += oxy * (-mfbba);
+                  //////////////////////////////////////////////////////////////////////////
+
+                  // linear combinations back
+                  mfcaa = c1o3 * (mxxMyy+mxxMzz+mxxPyyPzz);
+                  mfaca = c1o3 * (-two*  mxxMyy+mxxMzz+mxxPyyPzz);
+                  mfaac = c1o3 * (mxxMyy-two* mxxMzz+mxxPyyPzz);
+
+                  //3.
+                  // linear combinations
+
+                  LBMReal mxxyPyzz = mfcba+mfabc;
+                  LBMReal mxxyMyzz = mfcba-mfabc;
+
+                  LBMReal mxxzPyyz = mfcab+mfacb;
+                  LBMReal mxxzMyyz = mfcab-mfacb;
+
+                  LBMReal mxyyPxzz = mfbca+mfbac;
+                  LBMReal mxyyMxzz = mfbca-mfbac;
+
+                  //relax
+                  //////////////////////////////////////////////////////////////////////////
+                  //das ist der limiter
+                  wadjust = Oxyz+(one-Oxyz)*abs(mfbbb)/(abs(mfbbb)+qudricLimitD);
+                  mfbbb += wadjust * (-mfbbb);
+                  wadjust = OxyyPxzz+(one-OxyyPxzz)*abs(mxxyPyzz)/(abs(mxxyPyzz)+qudricLimitP);
+                  mxxyPyzz += wadjust * (-mxxyPyzz);
+                  wadjust = OxyyMxzz+(one-OxyyMxzz)*abs(mxxyMyzz)/(abs(mxxyMyzz)+qudricLimitM);
+                  mxxyMyzz += wadjust * (-mxxyMyzz);
+                  wadjust = OxyyPxzz+(one-OxyyPxzz)*abs(mxxzPyyz)/(abs(mxxzPyyz)+qudricLimitP);
+                  mxxzPyyz += wadjust * (-mxxzPyyz);
+                  wadjust = OxyyMxzz+(one-OxyyMxzz)*abs(mxxzMyyz)/(abs(mxxzMyyz)+qudricLimitM);
+                  mxxzMyyz += wadjust * (-mxxzMyyz);
+                  wadjust = OxyyPxzz+(one-OxyyPxzz)*abs(mxyyPxzz)/(abs(mxyyPxzz)+qudricLimitP);
+                  mxyyPxzz += wadjust * (-mxyyPxzz);
+                  wadjust = OxyyMxzz+(one-OxyyMxzz)*abs(mxyyMxzz)/(abs(mxyyMxzz)+qudricLimitM);
+                  mxyyMxzz += wadjust * (-mxyyMxzz);
+                  //////////////////////////////////////////////////////////////////////////
+                  //ohne limiter
+                  //mfbbb     += OxyyMxzz * (-mfbbb);
+                  //mxxyPyzz  += OxyyPxzz * (-mxxyPyzz);
+                  //mxxyMyzz  += OxyyMxzz * (-mxxyMyzz);
+                  //mxxzPyyz  += OxyyPxzz * (-mxxzPyyz);
+                  //mxxzMyyz  += OxyyMxzz * (-mxxzMyyz);
+                  //mxyyPxzz  += OxyyPxzz * (-mxyyPxzz);
+                  //mxyyMxzz  += OxyyMxzz * (-mxyyMxzz);
+                  //////////////////////////////////////////////////////////////////////////
+
+                  //// linear combinations back
+                  mfcba = (mxxyMyzz+mxxyPyzz) * c1o2;
+                  mfabc = (-mxxyMyzz+mxxyPyzz) * c1o2;
+                  mfcab = (mxxzMyyz+mxxzPyyz) * c1o2;
+                  mfacb = (-mxxzMyyz+mxxzPyyz) * c1o2;
+                  mfbca = (mxyyMxzz+mxyyPxzz) * c1o2;
+                  mfbac = (-mxyyMxzz+mxyyPxzz) * c1o2;
+
+                  //4.
+                  //////////////////////////////////////////////////////////////////////////
+                  //mit limiter
+               //	wadjust    = O4+(one-O4)*abs(CUMacc)/(abs(CUMacc)+qudricLimit);
+                  //CUMacc    += wadjust * (-CUMacc);
+               //	wadjust    = O4+(one-O4)*abs(CUMcac)/(abs(CUMcac)+qudricLimit);
+                  //CUMcac    += wadjust * (-CUMcac); 
+               //	wadjust    = O4+(one-O4)*abs(CUMcca)/(abs(CUMcca)+qudricLimit);
+                  //CUMcca    += wadjust * (-CUMcca); 
+
+               //	wadjust    = O4+(one-O4)*abs(CUMbbc)/(abs(CUMbbc)+qudricLimit);
+                  //CUMbbc    += wadjust * (-CUMbbc); 
+               //	wadjust    = O4+(one-O4)*abs(CUMbcb)/(abs(CUMbcb)+qudricLimit);
+                  //CUMbcb    += wadjust * (-CUMbcb); 
+               //	wadjust    = O4+(one-O4)*abs(CUMcbb)/(abs(CUMcbb)+qudricLimit);
+                  //CUMcbb    += wadjust * (-CUMcbb); 
+                  //////////////////////////////////////////////////////////////////////////
+                  //////////////////////////////////////////////////////////////////////////
+                  LBMReal A = (four + two*omega - three*omega*omega) / (two - seven*omega + five*omega*omega);
+                  LBMReal B = (four + twentyeight*omega - fourteen*omega*omega) / (six - twentyone*omega + fiveteen*omega*omega);
+                  //////////////////////////////////////////////////////////////////////////
+
+
+                  //ohne limiter
+                  //CUMacc += O4 * (-CUMacc);
+                  //CUMcac += O4 * (-CUMcac);
+                  //CUMcca += O4 * (-CUMcca);
+
+                  //CUMbbc += O4 * (-CUMbbc);
+                  //CUMbcb += O4 * (-CUMbcb);
+                  //CUMcbb += O4 * (-CUMcbb);
+                  CUMacc = -O4*(one / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (one - O4) * (CUMacc);
+                  CUMcac = -O4*(one / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (one - O4) * (CUMcac);
+                  CUMcca = -O4*(one / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (one - O4) * (CUMcca);
+                  CUMbbc = -O4*(one / omega - c1o2) * Dxy           * c1o3 * B + (one - O4) * (CUMbbc);
+                  CUMbcb = -O4*(one / omega - c1o2) * Dxz           * c1o3 * B + (one - O4) * (CUMbcb);
+                  CUMcbb = -O4*(one / omega - c1o2) * Dyz           * c1o3 * B + (one - O4) * (CUMcbb);
+                  //////////////////////////////////////////////////////////////////////////
+
+
+                  //5.
+                  CUMbcc += O5 * (-CUMbcc);
+                  CUMcbc += O5 * (-CUMcbc);
+                  CUMccb += O5 * (-CUMccb);
+
+                  //6.
+                  CUMccc += O6 * (-CUMccc);
+
+
+
+                  //back cumulants to central moments
+                  //4.
+                  mfcbb = CUMcbb+((mfcaa+c1o3) * mfabb+two * mfbba * mfbab)/rho;
+                  mfbcb = CUMbcb+((mfaca+c1o3) * mfbab+two * mfbba * mfabb)/rho;
+                  mfbbc = CUMbbc+((mfaac+c1o3) * mfbba+two * mfbab * mfabb)/rho;
+
+                  mfcca = CUMcca+(((mfcaa * mfaca+two * mfbba * mfbba)+c1o3 * (mfcaa+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfcac = CUMcac+(((mfcaa * mfaac+two * mfbab * mfbab)+c1o3 * (mfcaa+mfaac))/rho-c1o9*(drho/rho));//(one/rho-one));
+                  mfacc = CUMacc+(((mfaac * mfaca+two * mfabb * mfabb)+c1o3 * (mfaac+mfaca))/rho-c1o9*(drho/rho));//(one/rho-one));
+
+                  //5.
+                  mfbcc = CUMbcc+((mfaac * mfbca+mfaca * mfbac+four * mfabb * mfbbb+two * (mfbab * mfacb+mfbba * mfabc))+c1o3 * (mfbca+mfbac))/rho;
+                  mfcbc = CUMcbc+((mfaac * mfcba+mfcaa * mfabc+four * mfbab * mfbbb+two * (mfabb * mfcab+mfbba * mfbac))+c1o3 * (mfcba+mfabc))/rho;
+                  mfccb = CUMccb+((mfcaa * mfacb+mfaca * mfcab+four * mfbba * mfbbb+two * (mfbab * mfbca+mfabb * mfcba))+c1o3 * (mfacb+mfcab))/rho;
+
+                  //6.
+
+                  mfccc = CUMccc-((-four *  mfbbb * mfbbb
+                     -(mfcaa * mfacc+mfaca * mfcac+mfaac * mfcca)
+                     -four * (mfabb * mfcbb+mfbab * mfbcb+mfbba * mfbbc)
+                     -two * (mfbca * mfbac+mfcba * mfabc+mfcab * mfacb))/rho
+                     +(four * (mfbab * mfbab * mfaca+mfabb * mfabb * mfcaa+mfbba * mfbba * mfaac)
+                        +two * (mfcaa * mfaca * mfaac)
+                        +sixteen *  mfbba * mfbab * mfabb)/(rho * rho)
+                     -c1o3 * (mfacc+mfcac+mfcca)/rho
+                     -c1o9 * (mfcaa+mfaca+mfaac)/rho
+                     +(two * (mfbab * mfbab+mfabb * mfabb+mfbba * mfbba)
+                        +(mfaac * mfaca+mfaac * mfcaa+mfaca * mfcaa)+c1o3 *(mfaac+mfaca+mfcaa))/(rho * rho) * c2o3
+                     +c1o27*((drho * drho-drho)/(rho*rho)));
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  //forcing
+                  mfbaa=-mfbaa;
+                  mfaba=-mfaba;
+                  mfaab=-mfaab;
+                  //////////////////////////////////////////////////////////////////////////////////////
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //back
+            ////////////////////////////////////////////////////////////////////////////////////
+            //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Z - Dir
+                  m0 = mfaac * c1o2+mfaab * (vvz-c1o2)+(mfaaa+one* oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfaac-two* mfaab *  vvz+mfaaa                * (one-vz2)-one* oMdrho * vz2;
+                  m2 = mfaac * c1o2+mfaab * (vvz+c1o2)+(mfaaa+one* oMdrho) * (vz2+vvz) * c1o2;
+                  mfaaa = m0;
+                  mfaab = m1;
+                  mfaac = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfabc * c1o2+mfabb * (vvz-c1o2)+mfaba * (vz2-vvz) * c1o2;
+                  m1 = -mfabc-two* mfabb *  vvz+mfaba * (one-vz2);
+                  m2 = mfabc * c1o2+mfabb * (vvz+c1o2)+mfaba * (vz2+vvz) * c1o2;
+                  mfaba = m0;
+                  mfabb = m1;
+                  mfabc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfacc * c1o2+mfacb * (vvz-c1o2)+(mfaca+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfacc-two* mfacb *  vvz+mfaca                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m2 = mfacc * c1o2+mfacb * (vvz+c1o2)+(mfaca+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
+                  mfaca = m0;
+                  mfacb = m1;
+                  mfacc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfbac * c1o2+mfbab * (vvz-c1o2)+mfbaa * (vz2-vvz) * c1o2;
+                  m1 = -mfbac-two* mfbab *  vvz+mfbaa * (one-vz2);
+                  m2 = mfbac * c1o2+mfbab * (vvz+c1o2)+mfbaa * (vz2+vvz) * c1o2;
+                  mfbaa = m0;
+                  mfbab = m1;
+                  mfbac = m2;
+                  /////////b//////////////////////////////////////////////////////////////////////////
+                  m0 = mfbbc * c1o2+mfbbb * (vvz-c1o2)+mfbba * (vz2-vvz) * c1o2;
+                  m1 = -mfbbc-two* mfbbb *  vvz+mfbba * (one-vz2);
+                  m2 = mfbbc * c1o2+mfbbb * (vvz+c1o2)+mfbba * (vz2+vvz) * c1o2;
+                  mfbba = m0;
+                  mfbbb = m1;
+                  mfbbc = m2;
+                  /////////b//////////////////////////////////////////////////////////////////////////
+                  m0 = mfbcc * c1o2+mfbcb * (vvz-c1o2)+mfbca * (vz2-vvz) * c1o2;
+                  m1 = -mfbcc-two* mfbcb *  vvz+mfbca * (one-vz2);
+                  m2 = mfbcc * c1o2+mfbcb * (vvz+c1o2)+mfbca * (vz2+vvz) * c1o2;
+                  mfbca = m0;
+                  mfbcb = m1;
+                  mfbcc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcac * c1o2+mfcab * (vvz-c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfcac-two* mfcab *  vvz+mfcaa                  * (one-vz2)-c1o3 * oMdrho * vz2;
+                  m2 = mfcac * c1o2+mfcab * (vvz+c1o2)+(mfcaa+c1o3 * oMdrho) * (vz2+vvz) * c1o2;
+                  mfcaa = m0;
+                  mfcab = m1;
+                  mfcac = m2;
+                  /////////c//////////////////////////////////////////////////////////////////////////
+                  m0 = mfcbc * c1o2+mfcbb * (vvz-c1o2)+mfcba * (vz2-vvz) * c1o2;
+                  m1 = -mfcbc-two* mfcbb *  vvz+mfcba * (one-vz2);
+                  m2 = mfcbc * c1o2+mfcbb * (vvz+c1o2)+mfcba * (vz2+vvz) * c1o2;
+                  mfcba = m0;
+                  mfcbb = m1;
+                  mfcbc = m2;
+                  /////////c//////////////////////////////////////////////////////////////////////////
+                  m0 = mfccc * c1o2+mfccb * (vvz-c1o2)+(mfcca+c1o9 * oMdrho) * (vz2-vvz) * c1o2;
+                  m1 = -mfccc-two* mfccb *  vvz+mfcca                  * (one-vz2)-c1o9 * oMdrho * vz2;
+                  m2 = mfccc * c1o2+mfccb * (vvz+c1o2)+(mfcca+c1o9 * oMdrho) * (vz2+vvz) * c1o2;
+                  mfcca = m0;
+                  mfccb = m1;
+                  mfccc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  //mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // Y - Dir
+                  m0 = mfaca * c1o2+mfaba * (vvy-c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfaca-two* mfaba *  vvy+mfaaa                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m2 = mfaca * c1o2+mfaba * (vvy+c1o2)+(mfaaa+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfaaa = m0;
+                  mfaba = m1;
+                  mfaca = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfacb * c1o2+mfabb * (vvy-c1o2)+(mfaab+c2o3 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfacb-two* mfabb *  vvy+mfaab                  * (one-vy2)-c2o3 * oMdrho * vy2;
+                  m2 = mfacb * c1o2+mfabb * (vvy+c1o2)+(mfaab+c2o3 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfaab = m0;
+                  mfabb = m1;
+                  mfacb = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfacc * c1o2+mfabc * (vvy-c1o2)+(mfaac+c1o6 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfacc-two* mfabc *  vvy+mfaac                  * (one-vy2)-c1o6 * oMdrho * vy2;
+                  m2 = mfacc * c1o2+mfabc * (vvy+c1o2)+(mfaac+c1o6 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfaac = m0;
+                  mfabc = m1;
+                  mfacc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfbca * c1o2+mfbba * (vvy-c1o2)+mfbaa * (vy2-vvy) * c1o2;
+                  m1 = -mfbca-two* mfbba *  vvy+mfbaa * (one-vy2);
+                  m2 = mfbca * c1o2+mfbba * (vvy+c1o2)+mfbaa * (vy2+vvy) * c1o2;
+                  mfbaa = m0;
+                  mfbba = m1;
+                  mfbca = m2;
+                  /////////b//////////////////////////////////////////////////////////////////////////
+                  m0 = mfbcb * c1o2+mfbbb * (vvy-c1o2)+mfbab * (vy2-vvy) * c1o2;
+                  m1 = -mfbcb-two* mfbbb *  vvy+mfbab * (one-vy2);
+                  m2 = mfbcb * c1o2+mfbbb * (vvy+c1o2)+mfbab * (vy2+vvy) * c1o2;
+                  mfbab = m0;
+                  mfbbb = m1;
+                  mfbcb = m2;
+                  /////////b//////////////////////////////////////////////////////////////////////////
+                  m0 = mfbcc * c1o2+mfbbc * (vvy-c1o2)+mfbac * (vy2-vvy) * c1o2;
+                  m1 = -mfbcc-two* mfbbc *  vvy+mfbac * (one-vy2);
+                  m2 = mfbcc * c1o2+mfbbc * (vvy+c1o2)+mfbac * (vy2+vvy) * c1o2;
+                  mfbac = m0;
+                  mfbbc = m1;
+                  mfbcc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcca * c1o2+mfcba * (vvy-c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfcca-two* mfcba *  vvy+mfcaa                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m2 = mfcca * c1o2+mfcba * (vvy+c1o2)+(mfcaa+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfcaa = m0;
+                  mfcba = m1;
+                  mfcca = m2;
+                  /////////c//////////////////////////////////////////////////////////////////////////
+                  m0 = mfccb * c1o2+mfcbb * (vvy-c1o2)+(mfcab+c2o9 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfccb-two* mfcbb *  vvy+mfcab                  * (one-vy2)-c2o9 * oMdrho * vy2;
+                  m2 = mfccb * c1o2+mfcbb * (vvy+c1o2)+(mfcab+c2o9 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfcab = m0;
+                  mfcbb = m1;
+                  mfccb = m2;
+                  /////////c//////////////////////////////////////////////////////////////////////////
+                  m0 = mfccc * c1o2+mfcbc * (vvy-c1o2)+(mfcac+c1o18 * oMdrho) * (vy2-vvy) * c1o2;
+                  m1 = -mfccc-two* mfcbc *  vvy+mfcac                   * (one-vy2)-c1o18 * oMdrho * vy2;
+                  m2 = mfccc * c1o2+mfcbc * (vvy+c1o2)+(mfcac+c1o18 * oMdrho) * (vy2+vvy) * c1o2;
+                  mfcac = m0;
+                  mfcbc = m1;
+                  mfccc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  //mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  // X - Dir
+                  m0 = mfcaa * c1o2+mfbaa * (vvx-c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcaa-two* mfbaa *  vvx+mfaaa                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m2 = mfcaa * c1o2+mfbaa * (vvx+c1o2)+(mfaaa+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfaaa = m0;
+                  mfbaa = m1;
+                  mfcaa = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcba * c1o2+mfbba * (vvx-c1o2)+(mfaba+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcba-two* mfbba *  vvx+mfaba                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m2 = mfcba * c1o2+mfbba * (vvx+c1o2)+(mfaba+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfaba = m0;
+                  mfbba = m1;
+                  mfcba = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcca * c1o2+mfbca * (vvx-c1o2)+(mfaca+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcca-two* mfbca *  vvx+mfaca                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m2 = mfcca * c1o2+mfbca * (vvx+c1o2)+(mfaca+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfaca = m0;
+                  mfbca = m1;
+                  mfcca = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcab * c1o2+mfbab * (vvx-c1o2)+(mfaab+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcab-two* mfbab *  vvx+mfaab                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m2 = mfcab * c1o2+mfbab * (vvx+c1o2)+(mfaab+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfaab = m0;
+                  mfbab = m1;
+                  mfcab = m2;
+                  ///////////b////////////////////////////////////////////////////////////////////////
+                  m0 = mfcbb * c1o2+mfbbb * (vvx-c1o2)+(mfabb+c4o9 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcbb-two* mfbbb *  vvx+mfabb                  * (one-vx2)-c4o9 * oMdrho * vx2;
+                  m2 = mfcbb * c1o2+mfbbb * (vvx+c1o2)+(mfabb+c4o9 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfabb = m0;
+                  mfbbb = m1;
+                  mfcbb = m2;
+                  ///////////b////////////////////////////////////////////////////////////////////////
+                  m0 = mfccb * c1o2+mfbcb * (vvx-c1o2)+(mfacb+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfccb-two* mfbcb *  vvx+mfacb                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m2 = mfccb * c1o2+mfbcb * (vvx+c1o2)+(mfacb+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfacb = m0;
+                  mfbcb = m1;
+                  mfccb = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  ////////////////////////////////////////////////////////////////////////////////////
+                  m0 = mfcac * c1o2+mfbac * (vvx-c1o2)+(mfaac+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcac-two* mfbac *  vvx+mfaac                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m2 = mfcac * c1o2+mfbac * (vvx+c1o2)+(mfaac+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfaac = m0;
+                  mfbac = m1;
+                  mfcac = m2;
+                  ///////////c////////////////////////////////////////////////////////////////////////
+                  m0 = mfcbc * c1o2+mfbbc * (vvx-c1o2)+(mfabc+c1o9 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfcbc-two* mfbbc *  vvx+mfabc                  * (one-vx2)-c1o9 * oMdrho * vx2;
+                  m2 = mfcbc * c1o2+mfbbc * (vvx+c1o2)+(mfabc+c1o9 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfabc = m0;
+                  mfbbc = m1;
+                  mfcbc = m2;
+                  ///////////c////////////////////////////////////////////////////////////////////////
+                  m0 = mfccc * c1o2+mfbcc * (vvx-c1o2)+(mfacc+c1o36 * oMdrho) * (vx2-vvx) * c1o2;
+                  m1 = -mfccc-two* mfbcc *  vvx+mfacc                   * (one-vx2)-c1o36 * oMdrho * vx2;
+                  m2 = mfccc * c1o2+mfbcc * (vvx+c1o2)+(mfacc+c1o36 * oMdrho) * (vx2+vvx) * c1o2;
+                  mfacc = m0;
+                  mfbcc = m1;
+                  mfccc = m2;
+                  ////////////////////////////////////////////////////////////////////////////////////
+
+                  //////////////////////////////////////////////////////////////////////////
+                  //proof correctness
+                  //////////////////////////////////////////////////////////////////////////
+#ifdef  PROOF_CORRECTNESS
+                  LBMReal drho_post = (mfaaa+mfaac+mfaca+mfcaa+mfacc+mfcac+mfccc+mfcca)
+                     +(mfaab+mfacb+mfcab+mfccb)+(mfaba+mfabc+mfcba+mfcbc)+(mfbaa+mfbac+mfbca+mfbcc)
+                     +(mfabb+mfcbb)+(mfbab+mfbcb)+(mfbba+mfbbc)+mfbbb;
+                  //LBMReal dif = fabs(rho - rho_post);
+                  LBMReal dif = drho - drho_post;
+#ifdef SINGLEPRECISION
+                  if (dif > 10.0E-7 || dif < -10.0E-7)
+#else
+                  if (dif > 10.0E-15 || dif < -10.0E-15)
+#endif
+                  {
+                     UB_THROW(UbException(UB_EXARGS, "rho="+UbSystem::toString(drho)+", rho_post="+UbSystem::toString(drho_post)
+                        +" dif="+UbSystem::toString(dif)
+                        +" rho is not correct for node "+UbSystem::toString(x1)+","+UbSystem::toString(x2)+","+UbSystem::toString(x3)));
+                     //UBLOG(logERROR,"LBMKernelETD3Q27CCLB::collideAll(): rho is not correct for node "+UbSystem::toString(x1)+","+UbSystem::toString(x2)+","+UbSystem::toString(x3));
+                     //exit(EXIT_FAILURE);
+                  }
+#endif
+                  //////////////////////////////////////////////////////////////////////////
+                  //write distribution
+                  //////////////////////////////////////////////////////////////////////////
+                  (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3)    = mfabb;
+                  (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3)    = mfbab;
+                  (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3)    = mfbba;
+                  (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3)   = mfaab;
+                  (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3)   = mfcab;
+                  (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3)   = mfaba;
+                  (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3)   = mfcba;
+                  (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3)   = mfbaa;
+                  (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3)   = mfbca;
+                  (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3)  = mfaaa;
+                  (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3)  = mfcaa;
+                  (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3)  = mfaca;
+                  (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3)  = mfcca;
+
+                  (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) = mfcbb;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) = mfbcb;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) = mfbbc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = mfccb;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = mfacb;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = mfcbc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = mfabc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = mfbcc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = mfbac;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = mfccc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = mfacc;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = mfcac;
+                  (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = mfaac;
+
+                  (*this->zeroDistributions)(x1, x2, x3) = mfbbb;
+                  //////////////////////////////////////////////////////////////////////////
+
+               }
+            }
+         }
+      }
+
+   }
+}
+//////////////////////////////////////////////////////////////////////////
+double CompressibleCumulant2LBMKernel::getCallculationTime()
+{
+   //return timer.getDuration();
+   return timer.getTotalTime();
+}
+//////////////////////////////////////////////////////////////////////////
+void CompressibleCumulant2LBMKernel::setBulkOmegaToOmega(bool value)
+{
+   bulkOmegaToOmega = value;
+}
diff --git a/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.h b/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.h
new file mode 100644
index 000000000..5b720e695
--- /dev/null
+++ b/source/VirtualFluidsCore/LBM/CompressibleCumulant2LBMKernel.h
@@ -0,0 +1,72 @@
+#ifndef CompressibleCumulant2LBMKernel_h__
+#define CompressibleCumulant2LBMKernel_h__
+
+#include "LBMKernel.h"
+#include "BCProcessor.h"
+#include "D3Q27System.h"
+#include <boost/serialization/export.hpp>
+#include "basics/utilities/UbTiming.h"
+#include "basics/container/CbArray4D.h"
+#include "basics/container/CbArray3D.h"
+
+class CompressibleCumulant2LBMKernel;
+typedef boost::shared_ptr<CompressibleCumulant2LBMKernel> CompressibleCumulant2LBMKernelPtr;
+
+//! \brief   compressible cumulant LBM kernel. 
+//! \details CFD solver that use Cascaded Cumulant Lattice Boltzmann method for D3Q27 model
+//! \author  K. Kutscher, M. Geier
+class CompressibleCumulant2LBMKernel :  public LBMKernel
+{
+public:
+   //! This option set relaxation parameter: NORMAL  
+   enum Parameter{NORMAL, MAGIC};
+public:
+   CompressibleCumulant2LBMKernel();
+   //! Constructor
+   //! \param nx1 number of nodes in x dimension
+   //! \param nx2 number of nodes in y dimension
+   //! \param nx3 number of nodes in z dimension
+   //! \param p   set relaxation parameter: NORMAL is OxyyMxzz = 1.0 and MAGIC is OxyyMxzz = 2.0 +(-collFactor)
+   CompressibleCumulant2LBMKernel(int nx1, int nx2, int nx3, Parameter p);
+   virtual ~CompressibleCumulant2LBMKernel(void);
+   virtual void calculate();
+   virtual LBMKernelPtr clone();
+   double getCallculationTime();
+   void setBulkOmegaToOmega(bool value);
+protected:
+   friend class boost::serialization::access;
+   template<class Archive>
+   void serialize(Archive & ar, const unsigned int version)
+   {
+      ar & boost::serialization::base_object<LBMKernel>(*this);
+      ar & OxyyMxzz; 
+      ar & parameter;
+   }
+
+   virtual void collideAll();  
+   virtual void init();
+   LBMReal f[D3Q27System::ENDF+1];
+
+   UbTimer timer;
+
+   LBMReal OxyyMxzz;
+   Parameter parameter;
+
+   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+   CbArray4D<LBMReal,IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+   CbArray3D<LBMReal,IndexerX3X2X1>::CbArray3DPtr   zeroDistributions;
+
+   mu::value_type muX1,muX2,muX3;
+   mu::value_type muDeltaT;
+   mu::value_type muNu;
+   LBMReal forcingX1;
+   LBMReal forcingX2;
+   LBMReal forcingX3;
+   
+   // bulk viscosity
+   bool bulkOmegaToOmega;
+   LBMReal OxxPyyPzz; 
+};
+#endif // CompressibleCumulantLBMKernel_h__
+
+
diff --git a/source/VirtualFluidsCore/Utilities/ConfigurationFile.hpp b/source/VirtualFluidsCore/Utilities/ConfigurationFile.hpp
index 473d9a518..7231f601f 100644
--- a/source/VirtualFluidsCore/Utilities/ConfigurationFile.hpp
+++ b/source/VirtualFluidsCore/Utilities/ConfigurationFile.hpp
@@ -1,5 +1,5 @@
-#ifndef Configuration_h__
-#define Configuration_h__
+#ifndef ConfigurationFile_h__
+#define ConfigurationFile_h__
 
 #include <map>
 #include <string>
diff --git a/source/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp b/source/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
index da1d816d5..59265a084 100644
--- a/source/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
+++ b/source/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
@@ -68,13 +68,13 @@ void BoundaryConditionsBlockVisitor::visit(Grid3DPtr grid, Block3DPtr block)
                      if (bca)
                      {
                         bca = bca->clone();
-                        bca->addNode(x1, x2, x3);
-                        bca->addBcPointer(bcPtr);
+                        bca->setNodeIndex(x1, x2, x3);
+                        bca->setBcPointer(bcPtr);
                         bca->addDistributions(distributions);
                         bca->setCollFactor(collFactor);
                         bca->setCompressible(compressible);
-                        bcProcessor->addBC(bca);
                         bca->setBcArray(bcArray);
+                        bcProcessor->addBC(bca);
                      }
                   }
                }
diff --git a/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp b/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
index 190c47eee..20df4e0ba 100644
--- a/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
+++ b/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.cpp
@@ -2,70 +2,65 @@
 #include "Grid3DSystem.h"
 #include <boost/foreach.hpp>
 
+
 GenBlocksGridVisitor::GenBlocksGridVisitor(GbObject3DPtr boundingBox) :
-   boundingBox(boundingBox),
-   nx1(0),
-   nx2(0),
-   nx3(0),
-   withDeltaX(true)
+   boundingBox(boundingBox)
 {
 
 }
-GenBlocksGridVisitor::GenBlocksGridVisitor(int nx1, int nx2, int nx3) :
-   nx1(nx1),
-   nx2(nx2),
-   nx3(nx3),
-   withDeltaX(false)
-{
 
-}
 //////////////////////////////////////////////////////////////////////////
-void GenBlocksGridVisitor::visit( const Grid3DPtr grid )
+void GenBlocksGridVisitor::visit(const Grid3DPtr grid)
 {
-   findOrigin(grid);
+   double orgX1 = boundingBox->getX1Minimum();
+   double orgX2 = boundingBox->getX2Minimum();
+   double orgX3 = boundingBox->getX3Minimum();
+
+   double dx = grid->getDeltaX(0);
+
    UbTupleInt3 blockNX = grid->getBlockNX();
-   double dx;
-   double geoMaxX1 = boundingBox->getX1Maximum();
-   double geoMaxX2 = boundingBox->getX2Maximum();
-   double geoMaxX3 = boundingBox->getX3Maximum();
 
-   if (withDeltaX)
-   {
-      dx = grid->getDeltaX(0);
-      blockLentghX1 = (double)val<1>(blockNX)*dx;
-      blockLentghX2 = (double)val<2>(blockNX)*dx;
-      blockLentghX3 = (double)val<3>(blockNX)*dx;
-   } 
-   else
-   {
-      int gNX1 = grid->getNX1();
-      dx = boundingBox->getLengthX1()/double(val<1>(blockNX)*gNX1);
-      grid->setDeltaX(dx);
-      blockLentghX1 = val<1>(blockNX)*dx;
-      blockLentghX2 = val<2>(blockNX)*dx;
-      blockLentghX3 = val<3>(blockNX)*dx;
-   }
+   double blockLentghX1 = (double)val<1>(blockNX)*dx;
+   double blockLentghX2 = (double)val<2>(blockNX)*dx;
+   double blockLentghX3 = (double)val<3>(blockNX)*dx;
 
-   CoordinateTransformation3DPtr trafo(new CoordinateTransformation3D(orgX1,orgX2,orgX3,blockLentghX1,blockLentghX2,blockLentghX3));
+   CoordinateTransformation3DPtr trafo(new CoordinateTransformation3D(orgX1, orgX2, orgX3, blockLentghX1, blockLentghX2, blockLentghX3));
    grid->setCoordinateTransformator(trafo);
-   genBlocks(grid);
-
 
+   genBlocks(grid);
 }
 //////////////////////////////////////////////////////////////////////////
-void GenBlocksGridVisitor::fillExtentWithBlocks( Grid3DPtr grid )
+void GenBlocksGridVisitor::fillExtentWithBlocks(Grid3DPtr grid)
 {
-   for(int x3 =  val<3>(minInd); x3 <  val<3>(maxInd); x3++)
+   for (int x3 = val<3>(minInd); x3 < val<3>(maxInd); x3++)
    {
-      for(int x2 =  val<2>(minInd); x2 <  val<2>(maxInd); x2++)
+      for (int x2 = val<2>(minInd); x2 < val<2>(maxInd); x2++)
       {
-         for(int x1 =  val<1>(minInd); x1 <  val<1>(maxInd); x1++)
+         for (int x1 = val<1>(minInd); x1 < val<1>(maxInd); x1++)
          {
-            Block3DPtr block( new Block3D(x1,x2,x3,0) );
+            Block3DPtr block(new Block3D(x1, x2, x3, 0));
             grid->addBlock(block);
          }
       }
    }
+
+   //double dx = grid->getDeltaX(0);
+   //UbTupleInt3 blockNX = grid->getBlockNX();
+   //int maxIX1 = ceil(boundingBox->getX1Maximum() / ((double)val<1>(blockNX)*dx));
+   //int maxIX2 = ceil(boundingBox->getX2Maximum() / ((double)val<2>(blockNX)*dx));
+   //int maxIX3 = ceil(boundingBox->getX3Maximum() / ((double)val<3>(blockNX)*dx));
+
+   //for (int x3 = 0; x3 < maxIX1; x3++)
+   //{
+   //   for (int x2 = 0; x2 < maxIX2; x2++)
+   //   {
+   //      for (int x1 = 0; x1 < maxIX3; x1++)
+   //      {
+   //         Block3DPtr block(new Block3D(x1, x2, x3, 0));
+   //         grid->addBlock(block);
+   //      }
+   //   }
+   //}
 }
 //////////////////////////////////////////////////////////////////////////
 void GenBlocksGridVisitor::genBlocks(Grid3DPtr grid)
@@ -75,12 +70,14 @@ void GenBlocksGridVisitor::genBlocks(Grid3DPtr grid)
    double geoMaxX2 = boundingBox->getX2Maximum();
    double geoMaxX3 = boundingBox->getX3Maximum();
    maxInd = grid->getBlockIndexes(geoMaxX1, geoMaxX2, geoMaxX3);
+
    UbTupleDouble3 blockCoord = grid->getBlockWorldCoordinates(static_cast<int>(val<1>(maxInd)), static_cast<int>(val<2>(maxInd)), static_cast<int>(val<3>(maxInd)), 0);
-   if(geoMaxX1 >  val<1>(blockCoord))
+   double dx = grid->getDeltaX(0);
+   if (fabs(geoMaxX1-val<1>(blockCoord)) > dx)
       val<1>(maxInd) += 1;
-   if(geoMaxX2 >  val<2>(blockCoord))
+   if (fabs(geoMaxX2-val<2>(blockCoord)) > dx)
       val<2>(maxInd) += 1;
-   if(geoMaxX3 >  val<3>(blockCoord))
+   if (fabs(geoMaxX3-val<3>(blockCoord)) > dx)
       val<3>(maxInd) += 1;
 
    this->fillExtentWithBlocks(grid);
@@ -89,20 +86,4 @@ void GenBlocksGridVisitor::genBlocks(Grid3DPtr grid)
    grid->setNX2(val<2>(maxInd));
    grid->setNX3(val<3>(maxInd));
 }
-//////////////////////////////////////////////////////////////////////////
-void GenBlocksGridVisitor::findOrigin( Grid3DPtr grid )
-{
-   orgX1 = boundingBox->getX1Minimum();
-   orgX2 = boundingBox->getX2Minimum();
-   orgX3 = boundingBox->getX3Minimum();
-
-   //double minX1, minX2, minX3;
 
-   //minX1 = boundingBox->getX1Minimum();
-   //minX2 = boundingBox->getX2Minimum();
-   //minX3 = boundingBox->getX3Minimum();
-
-   //if(minX1 <= orgX1) orgX1 = minX1;
-   //if(minX2 <= orgX2) orgX2 = minX2;
-   //if(minX3 <= orgX3) orgX3 = minX3;
-}
diff --git a/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.h b/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.h
index 8a793be1d..59aeace85 100644
--- a/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.h
+++ b/source/VirtualFluidsCore/Visitors/GenBlocksGridVisitor.h
@@ -8,20 +8,14 @@ class GenBlocksGridVisitor : public Grid3DVisitor
 {
 public:
    GenBlocksGridVisitor(GbObject3DPtr boundingBox);
-   GenBlocksGridVisitor(int nx1, int nx2, int nx3);
    virtual ~GenBlocksGridVisitor(){}
 
    void visit(Grid3DPtr grid);
 
 private:
-   double orgX1, orgX2, orgX3;
    UbTupleInt3 minInd, maxInd;
-   int nx1, nx2, nx3;
-   double blockLentghX1, blockLentghX2, blockLentghX3;
-   bool withDeltaX;
    GbObject3DPtr boundingBox;
    void fillExtentWithBlocks(Grid3DPtr grid);
-   void findOrigin(Grid3DPtr grid);
    void genBlocks(Grid3DPtr grid);
 };
 
diff --git a/source/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp b/source/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
index 10170f8cb..0249f9f64 100644
--- a/source/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
+++ b/source/VirtualFluidsCore/Visitors/SpongeLayerBlockVisitor.cpp
@@ -4,7 +4,7 @@
 
 using namespace std;
 
-SpongeLayerBlockVisitor::SpongeLayerBlockVisitor( GbCuboid3DPtr boundingBox ) : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), boundingBox(boundingBox)
+SpongeLayerBlockVisitor::SpongeLayerBlockVisitor(GbCuboid3DPtr boundingBox) : Block3DVisitor(0, Grid3DSystem::MAXLEVEL), boundingBox(boundingBox)
 {
 
 }
@@ -14,37 +14,24 @@ SpongeLayerBlockVisitor::~SpongeLayerBlockVisitor()
 
 }
 //////////////////////////////////////////////////////////////////////////
-void SpongeLayerBlockVisitor::visit( Grid3DPtr grid, Block3DPtr block )
+void SpongeLayerBlockVisitor::visit(Grid3DPtr grid, Block3DPtr block)
 {
-   double minX1,minX2,minX3,maxX1,maxX2,maxX3;
-   int gridRank = grid->getRank();
+   if (block->getRank() == grid->getRank())
+   {
+      UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
+      UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
 
-   int minInitLevel = grid->getCoarsestInitializedLevel();
-   int maxInitLevel = grid->getFinestInitializedLevel();
+      double minX1 = val<1>(org);
+      double minX2 = val<2>(org);
+      double minX3 = val<3>(org);
+      double maxX1 = val<1>(org)+val<1>(blockLengths);
+      double maxX2 = val<2>(org)+val<2>(blockLengths);
+      double maxX3 = val<3>(org)+val<3>(blockLengths);
 
-   double numSolids = 0.0;
-   double numFluids = 0.0;
-   for (int level = minInitLevel; level<=maxInitLevel; level++)
-   {
-      vector<Block3DPtr> blockVector;
-      grid->getBlocks(level, gridRank, blockVector);
-      BOOST_FOREACH(Block3DPtr block, blockVector)
+      if (boundingBox->isCellInsideGbObject3D(minX1, minX2, minX3, maxX1, maxX2, maxX3))
       {
-         UbTupleDouble3 org = grid->getBlockWorldCoordinates(block);
-         UbTupleDouble3 blockLengths = grid->getBlockLengths(block);
-
-         minX1 = val<1>(org);
-         minX2 = val<2>(org);
-         minX3 = val<3>(org);
-         maxX1 = val<1>(org)+val<1>(blockLengths);
-         maxX2 = val<2>(org)+val<2>(blockLengths);
-         maxX3 = val<3>(org)+val<3>(blockLengths);
-
-         if (boundingBox->isCellInsideGbObject3D(minX1,minX2,minX3,maxX1,maxX2,maxX3))
-         {
-            LBMKernelPtr kernel = block->getKernel();
-            kernel->setCollisionFactor(kernel->getCollisionFactor()*0.5);
-         }
+         LBMKernelPtr kernel = block->getKernel();
+         kernel->setCollisionFactor(LBMSystem::calcCollisionFactor(0.01, block->getLevel()));
       }
    }
 }
-- 
GitLab