diff --git a/.gitignore b/.gitignore
index 4879d82bb2c5ce51f43be6ea9fb183cd6e28097c..df134f3d8f4289b954a8b3f7bfbc138b6be6f80e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,5 @@ output/
 # MacOS
 .DS_Store
 
+#Cluster
+run/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c5a86242d11893c2d1a7e876bee96665be2b0f3c..07ff3f6e3f200a4a2cde0d6fc6499b0cd8abb23d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,6 +3,7 @@ image: irmb/virtualfluids-python-deps:latest
 stages:
   - build
   - build_python
+  - container_upload
   - test
   - benchmark
   - analyze
@@ -33,27 +34,25 @@ stages:
     paths:
       - $BUILD_FOLDER
 
-  before_script:
-    - export CC=gcc
-    - export CXX=g++
+  script:
     - export CCACHE_BASEDIR=$CI_PROJECT_DIR
     - export CCACHE_DIR=$CI_PROJECT_DIR/cache
-    - ccache -s
-
-
-  script:
+    - export CCACHE_COMPILERCHECK=content
+    - ccache --zero-stats
+    - ccache --show-stats
     - $CXX --version
     - $CC --version
     - cmake --version
     - mpirun --version
     - mkdir -p $CI_PROJECT_DIR/$BUILD_FOLDER
     - cd $CI_PROJECT_DIR/$BUILD_FOLDER
-    - cmake ..
+    - rm -r -f ./*
+    - cmake .. -LAH
       --preset=all_make_ccache
       -DBUILD_WARNINGS_AS_ERRORS=ON
       -DCMAKE_CUDA_ARCHITECTURES=60
-    - cmake . -LAH
     - make -j4
+    - ccache --show-stats
 
   variables:
     BUILD_FOLDER: "build"
@@ -62,8 +61,11 @@ stages:
 gcc_9:
   extends: .gnu_build_template
 
-###############################################################################
+  before_script:
+    - export CC=gcc
+    - export CXX=g++
 
+###############################################################################
 clang_10:
   extends: .gnu_build_template
 
@@ -71,42 +73,6 @@ clang_10:
     - export CC=clang
     - export CXX=clang++
 
-
-###############################################################################
-gcc_9_rebuild:
-  stage: build
-
-  image: irmb/virtualfluids-deps-ubuntu20.04
-
-  tags:
-    - gpu
-    - linux
-
-  before_script:
-    - export CCACHE_BASEDIR=$CI_PROJECT_DIR
-    - export CCACHE_DIR=$CI_PROJECT_DIR/cache
-    - ccache -s
-
-  script:
-    - mkdir -p $CI_PROJECT_DIR/build
-    - cd $CI_PROJECT_DIR/build
-    - rm -r -f ./*
-    - cmake ..
-      --preset=all_make_ccache
-      -DBUILD_WARNINGS_AS_ERRORS=ON
-      -DCMAKE_CUDA_ARCHITECTURES=60
-    - make -j4  2>&1 | tee gcc_warnings.txt
-    - ccache -s
-
-  artifacts:
-    paths:
-      - build/gcc_warnings.txt
-
-  cache:
-    key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    paths:
-      - $CI_PROJECT_DIR/cache
-
 ###############################################################################
 msvc_16:
   stage: build
@@ -132,7 +98,7 @@ msvc_16:
     - cd $CI_PROJECT_DIR
     - md -force $env:BUILD_FOLDER
     - cd $env:BUILD_FOLDER
-    - cmake .. --preset=all_msvc -DBUILD_WARNINGS_AS_ERRORS=ON
+    - cmake .. --preset=all_msvc -DCMAKE_CUDA_ARCHITECTURES=61 -DBUILD_WARNINGS_AS_ERRORS=ON
     - MSBuild.exe VirtualFluids.sln /property:Configuration=$env:BUILD_CONFIGURATION /verbosity:minimal /maxcpucount:4
 
   cache:
@@ -145,30 +111,10 @@ msvc_16:
     paths:
       - $CI_PROJECT_DIR/$env:BUILD_FOLDER/
 
-###############################################################################
-build_singularity_image:
-  stage: build
-
-  tags:
-    - priviliged
-    - linux
-
-  rules:
-    - if: $CI_COMMIT_TAG
-
-  artifacts:
-    paths:
-      - Containers/VirtualFluidsOpenMPI.sif
-
-  script:
-    - singularity build --fakeroot Containers/VirtualFluidsOpenMPI.sif Containers/VirtualFluidsOpenMPI.def
-    - ls -sh Containers/VirtualFluidsOpenMPI.sif
-
 
 ###############################################################################
 ##                             Build Python                                  ##
 ###############################################################################
-
 gcc_9_python:
   stage: build_python
 
@@ -190,7 +136,27 @@ gcc_9_python:
     - export CCACHE_DIR=$CI_PROJECT_DIR/cache
 
   script:
-    - python3 setup.py bdist_wheel
+    - python3 setup.py bdist_wheel build_ext --build-temp=build
+
+###############################################################################
+##                            Container Upload                               ##
+###############################################################################
+build_singularity_image:
+  stage: container_upload
+
+  needs:
+    - gcc_9_python
+
+  tags:
+    - linux
+    - privileged
+
+  rules:
+    - if: $CI_COMMIT_TAG
+
+  script:
+    - singularity build Containers/VirtualFluidsPython.sif Containers/VirtualFluidsPython.def
+    - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" Containers/VirtualFluidsPython.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG"
 
 ###############################################################################
 ##                                Tests                                      ##
@@ -239,6 +205,47 @@ gcc_9_python_bindings_test:
   script:
     - python3 -m unittest discover -s Python -v
 
+###############################################################################
+gcc_9_python_slurm_test:
+  stage: test
+
+  needs: ["gcc_9_python"]
+
+  rules:
+    - if: $PHOENIX_PRIVATE_KEY
+
+  tags:
+    - linux
+    - privileged
+
+  variables:
+    SSH_KEY: "$PHOENIX_PRIVATE_KEY"
+    HOST: "$PHOENIX_HOSTNAME"
+    USER: "$PHOENIX_USER"
+
+  before_script:
+    - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client -y )'
+    - apt-get install -y rsync
+    - mkdir -p ~/.ssh
+    - chmod 700 ~/.ssh
+    - eval $(ssh-agent -s)
+    - echo "$SSH_KEY" | tr -d '\r' | ssh-add -
+    - echo $SSH_KEY >> ansible/private_key
+    - ssh-keyscan -t rsa $HOST >> ~/.ssh/known_hosts
+    - ssh $USER@$HOST "rm -rf output-*"
+    - ssh $USER@$HOST "rm -f *.out"
+    - pip3 install git+git://github.com/SvenMarcus/ssh-slurm-runner
+
+  script:
+    - singularity build PoiseuilleTestContainer.sif Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
+    - scp PoiseuilleTestContainer.sif $USER@$HOST:PoiseuilleTestContainer.sif
+    - scp Python/SlurmTests/poiseuille/slurm.job $USER@$HOST:slurm.job
+    - python3 -m ssh_slurm_runner slurm.job --host $HOST --user $USER --keyfile ansible/private_key
+    - ssh $USER@$HOST "rm -rf output-*"
+    - ssh $USER@$HOST "rm -f *.out"
+    - ssh $USER@$HOST "rm PoiseuilleTestContainer.sif"
+    - ssh $USER@$HOST "rm slurm.job"
+
 ###############################################################################
 ##                            Benchmark                                      ##
 ###############################################################################
@@ -297,6 +304,7 @@ gpu_numerical_tests:
       - $CI_PROJECT_DIR/cache
 
   artifacts:
+    expire_in: 1 hrs
     paths:
       - $CI_PROJECT_DIR/numerical_tests_gpu_results.txt
 
@@ -389,11 +397,10 @@ cppcheck:
     - cd $CI_PROJECT_DIR
     - cppcheck --version
     - cppcheck src --enable=all --xml 2> cppcheck.xml
-    - cppcheck-htmlreport --file=cppcheck.xml --report-dir=html_report --source-dir=.
 
   artifacts:
+    expire_in: 1 week
     paths:
-      - html_report/
       - cppcheck.xml
 
 ###############################################################################
@@ -421,6 +428,7 @@ lizard:
     - lizard -l cpp src/ > lizard.txt --warnings_only --ignore_warnings 400
 
   artifacts:
+    expire_in: 1 week
     paths:
       - lizard.txt
 
@@ -456,9 +464,9 @@ gcov_gcc_9:
     - gcovr -r $CI_PROJECT_DIR -k build -f "src" --print-summary --html coverage/coverage.html --html-details --xml coverage/coverage.xml
 
   artifacts:
+    expire_in: 1 week
     paths:
       - coverage/
-      - build/
 
     reports:
       cobertura: coverage/coverage.xml
@@ -480,6 +488,8 @@ clang-tidy:
 
   needs: []
 
+  allow_failure: true
+
   before_script:
     - run-clang-tidy -h
 
@@ -494,10 +504,35 @@ clang-tidy:
     - run-clang-tidy -quiet > clangtidy.txt
 
   artifacts:
+    when: always
+    expire_in: 1 week
     paths:
       - build/clangtidy.txt
       - build/compile_commands.json
 
+
+###############################################################################
+# doxgen
+doxygen:
+  stage: analyze
+
+  only:
+    - develop@irmb/VirtualFluids_dev
+
+  needs: []
+
+  image: alpine
+
+  script:
+  - apk update && apk add doxygen
+  - doxygen docs/Doxyfile
+
+  artifacts:
+    expire_in: 1 week
+    paths:
+    - docs/build/
+
+
 ###############################################################################
 ##                               Deploy                                      ##
 ###############################################################################
@@ -550,10 +585,10 @@ vf_wheel_to_jupyterhub:
   needs: ["gcc_9_python", "gcc_9_unit_tests", "gcc_9_python_bindings_test"]
 
   variables:
-    HOST: "finrod.irmb.bau.tu-bs.de"
-    SSH_KEY: "$SSH_PRIVATE_KEY_JUPYTER_HOST_AT_FINROD"
-    REMOTE_USER: "jupyter_host"
-    jupyter_host: "jupyter_host"
+    HOST: "gitlab-runner01.irmb.bau.tu-bs.de"
+    SSH_KEY: "$SSH_PRIVATE_KEY"
+    REMOTE_USER: "runner"
+    jupyter_host: "runner"
 
   script:
     - ansible-playbook -i ansible/hosts.cfg -u $REMOTE_USER ansible/playbook_jupyter_update.yml
@@ -574,7 +609,7 @@ sonar-scanner:
   variables:
     SONAR_HOST_URL: "http://gitlab-runner01.irmb.bau.tu-bs.de/sonarqube/"
 
-  needs: ["cppcheck","clang-tidy","gcov_gcc_9","gcc_9_rebuild"]
+  needs: ["cppcheck","clang-tidy","gcov_gcc_9"]
 
   before_script:
     - cd /tmp
diff --git a/AUTHORS.md b/AUTHORS.md
index aca25898d44f5d2f72ea8f018f54546197c301e1..00a5410811196462d6f808036be7feb5e74a102d 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -1,24 +1,25 @@
-AUTHORS
-============
-
+# AUTHORS
 VF has been developed essentially over the last fifteen years by various researchers at IRMB including (in alphabetical order):
 
-
-Dr. Benjamin Ahrenholz  
-M. Sc. Hussein Alihussein  
-Dr. Sebastian Bindick  
-J.Prof. Dr. Martin Geier  
-Dr. Sebastian Geller  
-Dr. Ehsan Goraki Fard  
-Dr. Jan Hegewald  
-Dr. Christian Janßen  
-M. Sc. Konstantin Kutscher  
-M. Sc. Stephan Lenz  
-Dr. Jan Linxweiler  
-M. Sc. Sören Peters  
-Dr. Hesameddin Safari  
-Dr. Martin Schönherr  
-Dipl.-Ing. Maik Stiebler  
-Dr. Sören Textor (aka Freudiger)  
-PD Dr. Jonas Tölke  
-Dr. Sonja Uphoff  
+Dr. Benjamin Ahrenholz
+M. Sc. Hussein Alihussein
+Dr. Sebastian Bindick
+B. Sc. Aileen Brendel
+J.Prof. Dr. Martin Geier
+Dr. Sebastian Geller
+Dr. Ehsan Goraki Fard
+Dr. Jan Hegewald
+Dr. Christian Janßen
+M. Sc. Konstantin Kutscher
+M. Sc. Stephan Lenz
+Dr. Jan Linxweiler
+B. Sc. Lennard Lux
+B. Sc. Sven Marcus
+M. Sc. Sören Peters
+Dr. Hesameddin Safari
+Dr. Martin Schönherr
+Dipl.-Ing. Maik Stiebler
+Dr. Sören Textor (aka Freudiger)
+PD Dr. Jonas Tölke
+Dr. Sonja Uphoff
+B. Sc. Anna Wellmann
\ No newline at end of file
diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake
index 23d5029d2dff46076c27158e4f15d0375bf8565f..2c2bc1d650e5e2c402f34a1a2547b9fa6f5e063b 100644
--- a/CMake/VirtualFluidsMacros.cmake
+++ b/CMake/VirtualFluidsMacros.cmake
@@ -131,18 +131,18 @@ function(vf_add_library)
     #################################################################
     ###   ADD TARGET                                              ###
     #################################################################
-    IF(${ARG_BUILDTYPE} MATCHES binary)
-        ADD_EXECUTABLE(${library_name} ${MY_SRCS} )
+    if(${ARG_BUILDTYPE} MATCHES binary)
+        add_executable(${library_name} ${MY_SRCS} )
         groupTarget(${library_name} ${appFolder})
-    ELSEIF(${ARG_BUILDTYPE} MATCHES shared)
-        ADD_LIBRARY(${library_name} SHARED ${MY_SRCS} )
+    elseif(${ARG_BUILDTYPE} MATCHES shared)
+        add_library(${library_name} SHARED ${MY_SRCS} )
         groupTarget(${library_name} ${libraryFolder})
-    ELSEIF(${ARG_BUILDTYPE} MATCHES static)
-        ADD_LIBRARY(${library_name} STATIC ${MY_SRCS} )
+        elseif(${ARG_BUILDTYPE} MATCHES static)
+        add_library(${library_name} STATIC ${MY_SRCS} )
         groupTarget(${library_name} ${libraryFolder})
-    ELSE()
-        MESSAGE(FATAL_ERROR "build_type=${ARG_BUILDTYPE} doesn't match BINARY, SHARED or STATIC")
-    ENDIF()
+    else()
+        message(FATAL_ERROR "build_type=${ARG_BUILDTYPE} doesn't match BINARY, SHARED or STATIC")
+    endif()
 
     # Set the output directory for build artifacts
     set_target_properties(${library_name}
@@ -152,6 +152,21 @@ function(vf_add_library)
             ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
             PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
 
+    # link time optimization
+    if(BUILD_VF_LTO)
+        if(NOT ${ARG_BUILDTYPE} MATCHES binary)
+            include(CheckIPOSupported)
+            check_ipo_supported(RESULT ipo_supported OUTPUT ipo_error LANGUAGES CXX)
+
+            if( ipo_supported )
+                status_lib("IPO / LTO enabled")
+                set_target_properties(${library_name} PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
+            else()
+                status_lib("IPO / LTO not supported: <${ipo_error}>")
+            endif()
+        endif()
+    endif()
+
     # clang-tidy
     if(BUILD_VF_CLANG_TIDY)
         find_program(CLANG_TIDY_PROGRAM NAMES clang-tidy)
@@ -286,6 +301,7 @@ function(vf_add_tests)
     # link googlemock
     linkGMOCK()
 
+    # add the target to ctest
     gtest_add_tests(TARGET ${library_test_name})
 
 endfunction()
diff --git a/CMake/cmake_config_files/CSE01.config.cmake b/CMake/cmake_config_files/CSE01.config.cmake
index baa0f94981c2e9f9ac05b62468311f4bead32ff3..cad3f60ce31edac1069d1edce3fdd43b49a72b6e 100644
--- a/CMake/cmake_config_files/CSE01.config.cmake
+++ b/CMake/cmake_config_files/CSE01.config.cmake
@@ -2,22 +2,22 @@
 #################################################################################
 #  BOOST  
 #################################################################################
-SET(BOOST_VERSION "1.60.0")
-SET(BOOST_ROOT "d:/boost/boost_1_60_0")
-SET(BOOST_DIR ${BOOST_ROOT})
-SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
+#SET(BOOST_VERSION "1.60.0")
+#SET(BOOST_ROOT "d:/boost/boost_1_60_0")
+#SET(BOOST_DIR ${BOOST_ROOT})
+#SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
 #################################################################################
 
 #################################################################################
 #  METIS  
 #################################################################################
-IF(${USE_METIS})
-  SET(METIS_INCLUDEDIR "d:/metis-5.1.0/include")
-  SET(METIS_DEBUG_LIBRARY "d:/metis-5.1.0/build/libmetis/Debug/metis.lib") 
-  SET(METIS_RELEASE_LIBRARY "d:/metis-5.1.0/build/libmetis/Release/metis.lib") 
-ENDIF()
+#IF(${USE_METIS})
+#  SET(METIS_INCLUDEDIR "d:/metis-5.1.0/include")
+#  SET(METIS_DEBUG_LIBRARY "d:/metis-5.1.0/build/libmetis/Debug/metis.lib") 
+#  SET(METIS_RELEASE_LIBRARY "d:/metis-5.1.0/build/libmetis/Release/metis.lib") 
+#ENDIF()
 #################################################################################
 #  VTK  
 #################################################################################
-set(VTK_DIR "d:/tools/VTK/build/VTK-8.2.0")
+#set(VTK_DIR "d:/tools/VTK/build/VTK-8.2.0")
 #################################################################################
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9685f4de572b180fa32766f6a15b87f4a8d3a922..d74febdac96544fa6f50b65602392d3c44a5ca10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,8 @@ option(BUILD_VF_INCLUDE_WHAT_YOU_USE "Add IWYU to the targets" OFF)
 option(BUILD_VF_CPPCHECK "Add cppcheck to the targets" OFF)
 option(BUILD_VF_COVERAGE "Add the -coverage compiler flag." OFF)
 
+option(BUILD_CUDA_LTO "Enables the cuda link optimization." OFF)
+
 option(BUILD_SHARED_LIBS "" OFF)
 option(BUILD_WARNINGS_AS_ERRORS "" OFF)
 
@@ -94,14 +96,36 @@ if(BUILD_VF_GPU)
 
     set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
 
+    enable_language(CUDA)
+
+
     if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
         message(WARNING "CMAKE_CUDA_ARCHITECTURES was not defined and is set to 30 (CUDA support until 10.1 only).")
         set(CMAKE_CUDA_ARCHITECTURES 30)
     endif()
 
+
+    if(BUILD_CUDA_LTO)
+        
+       if(CMAKE_CUDA_ARCHITECTURES LESS 50)
+            message(FATAL_ERROR "CUDA Link time optimization requires CUDA 11.2 and CC 5.0.")
+       else()
+
+        set(CMAKE_CUDA_FLAGS "-dlto -arch=sm_${CMAKE_CUDA_ARCHITECTURES}")
+        set(CMAKE_CUDA_ARCHITECTURES OFF)
+
+        list(APPEND VF_COMPILER_DEFINITION BUILD_CUDA_LTO)
+
+       endif()
+    endif()
+
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
+
+    message("CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
     message("CUDA Architecture: ${CMAKE_CUDA_ARCHITECTURES}")
 endif()
 
+
 #################################################################################
 #  COMMON LIBRARIES
 #################################################################################
@@ -119,6 +143,7 @@ find_package(MPI REQUIRED)
 
 
 add_subdirectory(src/basics)
+add_subdirectory(src/lbm)
 
 
 #################################################################################
diff --git a/CMakePresets.json b/CMakePresets.json
index 6863446af85b177bb4dc99eed475aa52f4d50269..6527d5c1f4feba60064b1473fa12dbb2331279aa 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -1,133 +1,179 @@
 {
-  "version": 1,
-  "cmakeMinimumRequired": {
-    "major": 3,
-    "minor": 19,
-    "patch": 0
-  },
-  "configurePresets": [
-    {
-      "name": "default",
-      "hidden": true,
-      "binaryDir": "${sourceDir}/build/",
-      "cacheVariables": {
-        "BUILD_VF_UNIT_TESTS": "ON"
-      }
-    },
-    {
-      "name": "default_make",
-      "inherits": "default",
-      "hidden": true,
-      "generator": "Unix Makefiles"
-    },
-    {
-      "name": "default_ccache_make",
-      "inherits": "default_make",
-      "hidden": true,
-      "cacheVariables": {
-        "CMAKE_CXX_COMPILER_LAUNCHER": "ccache",
-        "CMAKE_CUDA_COMPILER_LAUNCHER": "ccache",
-        "CMAKE_C_COMPILER_LAUNCHER": "ccache"
-      }
-    },
-    {
-      "name": "default_msvc",
-      "inherits": "default",
-      "hidden": true,
-      "generator": "Visual Studio 16 2019",
-      "architecture": "x64"
-    },
-    {
-      "name": "default_cpu",
-      "hidden": true,
-      "description": "CPU build of VirtualFluids",
-      "cacheVariables": {
-        "BUILD_VF_CPU": "ON"
-      }
-    },
-    {
-      "name": "default_gpu",
-      "hidden": true,
-      "description": "GPU build of VirtualFluids",
-      "cacheVariables": {
-        "BUILD_VF_GPU": "ON"
-      }
-    },
-    {
-      "name": "default_gpu_numerical_tests",
-      "inherits": ["default_gpu"],
-      "hidden": true,
-      "description": "GPU numerical tests of VirtualFluids",
-      "cacheVariables": {
-        "BUILD_VF_DOUBLE_ACCURACY": "ON",
-        "BUILD_NUMERIC_TESTS": "ON"
-      }
-    },
-    {
-      "name": "default_all",
-      "hidden": true,
-      "description": "All build of VirtualFluids",
-      "inherits": ["default_cpu", "default_gpu"]
-    },
-    {
-      "name": "cpu_make",
-      "inherits": ["default_make", "default_cpu"],
-      "displayName": "cpu make configuration"
-    },
-    {
-      "name": "cpu_make_ccache",
-      "inherits": ["default_ccache_make", "default_cpu"],
-      "displayName": "cpu ccache make configuration"
-    },
-    {
-      "name": "cpu_msvc",
-      "inherits": ["default_msvc", "default_cpu"],
-      "displayName": "cpu msvc configuration"
-    },
-    {
-      "name": "gpu_make",
-      "inherits": ["default_make", "default_gpu"],
-      "displayName": "gpu make configuration"
-    },
-    {
-      "name": "gpu_make_ccache",
-      "inherits": ["default_ccache_make", "default_gpu"],
-      "displayName": "gpu ccache make configuration"
-    },
-    {
-      "name": "gpu_msvc",
-      "inherits": ["default_msvc", "default_gpu"],
-      "displayName": "gpu msvc configuration"
-    },
-    {
-      "name": "all_make",
-      "inherits": ["default_make", "default_all"],
-      "displayName": "all make configuration"
-    },
-    {
-      "name": "all_make_ccache",
-      "inherits": ["default_ccache_make", "default_all"],
-      "displayName": "all ccache make configuration"
-    },
-    {
-      "name": "all_msvc",
-      "inherits": ["default_msvc", "default_all"],
-      "displayName": "all msvc configuration"
-    },
-    {
-      "name": "gpu_numerical_tests_make",
-      "inherits": ["default_make", "default_gpu_numerical_tests"],
-      "displayName": "gpu numerical tests make configuration"
-    },
-    {
-      "name": "gpu_numerical_tests_ccache_make",
-      "inherits": ["default_ccache_make", "default_gpu_numerical_tests"],
-      "displayName": "gpu numerical tests ccache make configuration"
-    },
-    {
-      "name": "gpu_numerical_tests_msvc",
-      "inherits": ["default_msvc", "default_gpu_numerical_tests"],
-      "displayName": "gpu numerical tests msvc configuration"
-    }
-  ]
+    "version": 1,
+    "cmakeMinimumRequired": {
+        "major": 3,
+        "minor": 19,
+        "patch": 0
+    },
+    "configurePresets": [
+        {
+            "name": "default",
+            "hidden": true,
+            "binaryDir": "${sourceDir}/build/",
+            "cacheVariables": {
+                "BUILD_VF_UNIT_TESTS": "ON"
+            }
+        },
+        {
+            "name": "default_make",
+            "inherits": "default",
+            "hidden": true,
+            "generator": "Unix Makefiles"
+        },
+        {
+            "name": "default_ccache_make",
+            "inherits": "default_make",
+            "hidden": true,
+            "cacheVariables": {
+                "CMAKE_CXX_COMPILER_LAUNCHER": "ccache",
+                "CMAKE_CUDA_COMPILER_LAUNCHER": "ccache",
+                "CMAKE_C_COMPILER_LAUNCHER": "ccache"
+            }
+        },
+        {
+            "name": "default_msvc",
+            "inherits": "default",
+            "hidden": true,
+            "generator": "Visual Studio 16 2019",
+            "architecture": "x64"
+        },
+        {
+            "name": "default_cpu",
+            "hidden": true,
+            "description": "CPU build of VirtualFluids",
+            "cacheVariables": {
+                "BUILD_VF_CPU": "ON",
+                "BUILD_VF_DOUBLE_ACCURACY": "ON"
+            }
+        },
+        {
+            "name": "default_gpu",
+            "hidden": true,
+            "description": "GPU build of VirtualFluids",
+            "cacheVariables": {
+                "BUILD_VF_GPU": "ON",
+                "BUILD_VF_DOUBLE_ACCURACY": "OFF"
+            }
+        },
+        {
+            "name": "default_gpu_numerical_tests",
+            "inherits": [
+                "default_gpu"
+            ],
+            "hidden": true,
+            "description": "GPU numerical tests of VirtualFluids",
+            "cacheVariables": {
+                "BUILD_VF_DOUBLE_ACCURACY": "ON",
+                "BUILD_NUMERIC_TESTS": "ON"
+            }
+        },
+        {
+            "name": "default_all",
+            "hidden": true,
+            "description": "All build of VirtualFluids",
+            "inherits": [
+                "default_cpu",
+                "default_gpu"
+            ],
+            "cacheVariables": {
+                "BUILD_VF_DOUBLE_ACCURACY": "ON"
+            }
+        },
+        {
+            "name": "cpu_make",
+            "inherits": [
+                "default_make",
+                "default_cpu"
+            ],
+            "displayName": "cpu make configuration"
+        },
+        {
+            "name": "cpu_make_ccache",
+            "inherits": [
+                "default_ccache_make",
+                "default_cpu"
+            ],
+            "displayName": "cpu ccache make configuration"
+        },
+        {
+            "name": "cpu_msvc",
+            "inherits": [
+                "default_msvc",
+                "default_cpu"
+            ],
+            "displayName": "cpu msvc configuration"
+        },
+        {
+            "name": "gpu_make",
+            "inherits": [
+                "default_make",
+                "default_gpu"
+            ],
+            "displayName": "gpu make configuration"
+        },
+        {
+            "name": "gpu_make_ccache",
+            "inherits": [
+                "default_ccache_make",
+                "default_gpu"
+            ],
+            "displayName": "gpu ccache make configuration"
+        },
+        {
+            "name": "gpu_msvc",
+            "inherits": [
+                "default_msvc",
+                "default_gpu"
+            ],
+            "displayName": "gpu msvc configuration"
+        },
+        {
+            "name": "all_make",
+            "inherits": [
+                "default_make",
+                "default_all"
+            ],
+            "displayName": "all make configuration"
+        },
+        {
+            "name": "all_make_ccache",
+            "inherits": [
+                "default_ccache_make",
+                "default_all"
+            ],
+            "displayName": "all ccache make configuration"
+        },
+        {
+            "name": "all_msvc",
+            "inherits": [
+                "default_msvc",
+                "default_all"
+            ],
+            "displayName": "all msvc configuration"
+        },
+        {
+            "name": "gpu_numerical_tests_make",
+            "inherits": [
+                "default_make",
+                "default_gpu_numerical_tests"
+            ],
+            "displayName": "gpu numerical tests make configuration"
+        },
+        {
+            "name": "gpu_numerical_tests_ccache_make",
+            "inherits": [
+                "default_ccache_make",
+                "default_gpu_numerical_tests"
+            ],
+            "displayName": "gpu numerical tests ccache make configuration"
+        },
+        {
+            "name": "gpu_numerical_tests_msvc",
+            "inherits": [
+                "default_msvc",
+                "default_gpu_numerical_tests"
+            ],
+            "displayName": "gpu numerical tests msvc configuration"
+        }
+    ]
 }
diff --git a/Containers/VirtualFluidsPython.def b/Containers/VirtualFluidsPython.def
new file mode 100644
index 0000000000000000000000000000000000000000..d54066bc634cf25f4340b1e659eae72515467fa8
--- /dev/null
+++ b/Containers/VirtualFluidsPython.def
@@ -0,0 +1,33 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%files
+    Python Python
+    dist dist
+
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update &&          \
+    apt-get install -y         \
+    build-essential            \
+    cmake=3.16.3-1ubuntu1      \
+    python3                    \
+    python3-dev                \
+    python3-pip                \
+    mpich                      \
+    libomp-dev
+
+    pip3 install setuptools wheel $(find dist/*.whl)
+
+%environment
+    export PYTHONPATH=/Python
+
+%runscript
+    python3 /Python/liddrivencavity/simulation.py
+
+%appenv poiseuille
+    export PYTHONPATH=Python
+
+%apprun poisueille
+    python3 /Python/poiseuille/poiseuille_hpc.py
diff --git a/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
new file mode 100644
index 0000000000000000000000000000000000000000..a3836e7906b9be66ec79f68bf53ccc079db9d9ef
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
@@ -0,0 +1,42 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%files
+    3rdParty 3rdParty
+    apps apps
+    CMake CMake
+    Python Python
+    src src
+    CMakeLists.txt CMakeLists.txt
+    cpu.cmake cpu.cmake
+    gpu.cmake gpu.cmake
+    setup.py setup.py
+    pyproject.toml pyproject.toml
+
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update &&          \
+    apt-get install -y         \
+    build-essential            \
+    cmake=3.16.3-1ubuntu1      \
+    python3                    \
+    python3-dev                \
+    python3-pip                \
+    mpich                      \
+    libomp-dev                 \
+    libgl1
+
+    pip3 install setuptools wheel numpy scipy pyvista
+
+    export PYTHONPATH=Python
+    python3 /setup.py install
+
+%environment
+    export PYTHONPATH=/Python
+
+%apprun testcase
+    python3 /Python/SlurmTests/poiseuille/simulation_runner.py
+
+%apprun evaluate
+    python3 /Python/SlurmTests/poiseuille/evaluator.py
\ No newline at end of file
diff --git a/Python/SlurmTests/poiseuille/evaluator.py b/Python/SlurmTests/poiseuille/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..74602846b67bb82f7e3c3d3ca3015fbe00a63041
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/evaluator.py
@@ -0,0 +1,20 @@
+import numpy as np
+import scipy.stats as stats
+import errors
+from SlurmTests.poiseuille.result_collector import collect_results
+from SlurmTests.poiseuille.settings import Scaling
+
+analytical_results, numerical_results = collect_results()
+normalized_l2_errors = [errors.normalized_l2_error(analytical, numerical)
+                        for analytical, numerical in zip(analytical_results, numerical_results)]
+
+nodes_in_x3_per_run = []
+for simulation_run in range(0, 3):
+    grid_params, _, _, _ = Scaling.configuration_for_scale_level(simulation_run)
+    nodes_in_x3_per_run.append(grid_params.number_of_nodes_per_direction[2])
+
+nodes_as_log = [np.log10(node) for node in nodes_in_x3_per_run]
+l2_norms_as_log = [np.log10(l2) for l2 in normalized_l2_errors]
+res = stats.linregress(nodes_as_log, l2_norms_as_log)
+
+assert res.slope <= -2, f"Expected slope of l2 error to be <= -2, but was {res.slope}"
diff --git a/Python/SlurmTests/poiseuille/result_collector.py b/Python/SlurmTests/poiseuille/result_collector.py
new file mode 100644
index 0000000000000000000000000000000000000000..06efa481c8c010647531426f2af2bec2c2d7eaee
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/result_collector.py
@@ -0,0 +1,73 @@
+from typing import Collection, List
+
+import pyvista as pv
+from poiseuille.analytical import PoiseuilleSettings, poiseuille_at_heights
+from vtk_utilities import vertical_column_from_mesh, get_values_from_indices
+from SlurmTests.poiseuille.settings import Scaling
+
+
+def get_output_file_name(output_folder, runtime_params):
+    timesteps = runtime_params.number_of_timesteps
+    file_name = f"{output_folder}/mq/mq{timesteps}/mq0_{timesteps}.bin.vtu"
+
+    return file_name
+
+
+def get_mesh_for_last_timestep(output_folder, runtime_params):
+    file_name_of_last_timestep = get_output_file_name(output_folder, runtime_params)
+    mesh_of_last_timestep = pv.read(file_name_of_last_timestep)
+    return mesh_of_last_timestep
+
+
+def get_heights_from_indices(mesh, indices):
+    return [mesh.points[index][2] for index in indices]
+
+
+def get_heights(output_folder, runtime_params):
+    mesh_of_last_timestep = get_mesh_for_last_timestep(output_folder, runtime_params)
+    column_indices = vertical_column_from_mesh(mesh_of_last_timestep)
+    heights = get_heights_from_indices(mesh_of_last_timestep, column_indices)
+    return heights
+
+
+def get_numerical_results(runtime_params, output_folder):
+    mesh_of_last_timestep = get_mesh_for_last_timestep(output_folder, runtime_params)
+    velocities_in_x_direction = mesh_of_last_timestep.get_array("Vx")
+    column_indices = vertical_column_from_mesh(mesh_of_last_timestep)
+    numerical_results = get_values_from_indices(velocities_in_x_direction, column_indices)
+
+    return numerical_results
+
+
+def get_analytical_results(grid_params, physical_params, kernel, height_values):
+    channel_height = grid_params.number_of_nodes_per_direction[2]
+    settings = get_analytical_poiseuille_settings(channel_height, physical_params, kernel)
+    max_grid_height = channel_height * grid_params.node_distance
+    adjusted_height_values = [value / max_grid_height * channel_height for value in height_values]
+    analytical_results = poiseuille_at_heights(settings, adjusted_height_values)
+    return analytical_results
+
+
+def get_analytical_poiseuille_settings(height, physical_params, kernel):
+    settings = PoiseuilleSettings()
+    settings.height = height
+    settings.viscosity = physical_params.lattice_viscosity
+    settings.density = 1
+    settings.force = kernel.forcing_in_x1
+
+    return settings
+
+
+def collect_results() -> (List[List[float]], List[List[float]]):
+    analytical_results = []
+    numerical_results = []
+
+    for simulation_run in range(0, 3):
+        output_folder = f"output-{simulation_run}"
+        grid_params, physical_params, runtime_params, kernel = Scaling.configuration_for_scale_level(simulation_run)
+        heights = get_heights(output_folder, runtime_params)
+        analytical_results.append(
+            get_analytical_results(grid_params, physical_params, kernel, heights))
+        numerical_results.append(get_numerical_results(runtime_params, output_folder))
+
+    return analytical_results, numerical_results
diff --git a/Python/SlurmTests/poiseuille/settings.py b/Python/SlurmTests/poiseuille/settings.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75c2b1d7133323880dd5520de0a96cb8fa87860
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/settings.py
@@ -0,0 +1,26 @@
+import os
+from acousticscaling import OneDirectionalAcousticScaling
+from pyfluids.kernel import LBMKernel, KernelType
+from pyfluids.parameters import RuntimeParameters, GridParameters, PhysicalParameters
+
+
+grid_params = GridParameters()
+grid_params.node_distance = 1
+grid_params.number_of_nodes_per_direction = [1, 1, 16]
+grid_params.blocks_per_direction = [1, 1, 4]
+grid_params.periodic_boundary_in_x1 = True
+grid_params.periodic_boundary_in_x2 = True
+
+physical_params = PhysicalParameters()
+physical_params.lattice_viscosity = 1e-4
+
+runtime_params = RuntimeParameters()
+runtime_params.number_of_threads = int(os.environ["PYFLUIDS_NUM_THREADS"])
+runtime_params.number_of_timesteps = 4_000_000
+runtime_params.timestep_log_interval = 1_000_000
+
+kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+kernel.use_forcing = True
+kernel.forcing_in_x1 = 5e-10
+
+Scaling = OneDirectionalAcousticScaling(grid_params, physical_params, runtime_params, kernel)
diff --git a/Python/SlurmTests/poiseuille/simulation_runner.py b/Python/SlurmTests/poiseuille/simulation_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b75de40b6a8f11ccd76f97f2ed9d709dc5362dd
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/simulation_runner.py
@@ -0,0 +1,19 @@
+import os
+
+from SlurmTests.poiseuille.settings import Scaling
+from poiseuille.simulation import run_simulation
+from pyfluids.writer import Writer, OutputFormat
+
+
+scale_level = int(os.environ["PYFLUIDS_SCALE_LEVEL"])
+grid_params, physical_params, runtime_params, kernel = Scaling.configuration_for_scale_level(scale_level)
+
+writer = Writer()
+writer.output_format = OutputFormat.BINARY
+writer.output_path = "./output-" + str(scale_level)
+
+run_simulation(grid_params=grid_params,
+               physical_params=physical_params,
+               runtime_params=runtime_params,
+               kernel=kernel,
+               writer=writer)
diff --git a/Python/SlurmTests/poiseuille/slurm.job b/Python/SlurmTests/poiseuille/slurm.job
new file mode 100644
index 0000000000000000000000000000000000000000..488fc9a42f261d69a8212cff389721fdfb9cbf6e
--- /dev/null
+++ b/Python/SlurmTests/poiseuille/slurm.job
@@ -0,0 +1,26 @@
+#!/bin/bash
+#SBATCH -J PyFluidsTest
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=20
+
+#SBATCH --mem-per-cpu=3000
+#SBATCH --time=07:00:00
+#SBATCH --partition=standard
+
+source $HOME/.bashrc
+
+echo "PyFluids Poiseuille Test Case"
+echo "Number of tasks: ${SLURM_NTASKS}"
+
+export SINGULARITYENV_PYFLUIDS_SCALE_LEVEL=0
+export SINGULARITYENV_PYFLUIDS_NUM_THREADS=4
+srun singularity run --app testcase PoiseuilleTestContainer.sif
+
+export SINGULARITYENV_PYFLUIDS_SCALE_LEVEL=1
+srun singularity run --app testcase PoiseuilleTestContainer.sif
+
+export SINGULARITYENV_PYFLUIDS_SCALE_LEVEL=2
+srun singularity run --app testcase PoiseuilleTestContainer.sif
+
+srun singularity run --app evaluate PoiseuilleTestContainer.sif
diff --git a/Python/acousticscaling.py b/Python/acousticscaling.py
new file mode 100644
index 0000000000000000000000000000000000000000..50b81db064251fa269f29bf72a561567ddedafbc
--- /dev/null
+++ b/Python/acousticscaling.py
@@ -0,0 +1,85 @@
+from pyfluids.kernel import LBMKernel
+from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+
+
+class OneDirectionalAcousticScaling:
+
+    def __init__(self, grid_parameters: GridParameters,
+                 physical_parameters: PhysicalParameters,
+                 runtime_parameters: RuntimeParameters,
+                 kernel: LBMKernel):
+        self._grid_params = grid_parameters
+        self._physical_params = physical_parameters
+        self._runtime_params = runtime_parameters
+        self._kernel = kernel
+
+    def configuration_for_scale_level(self, level: int = 1) -> (GridParameters,
+                                                                PhysicalParameters,
+                                                                RuntimeParameters,
+                                                                LBMKernel):
+        if level < 0:
+            raise ValueError("level must be >= 0")
+
+        grid_params = self.clone_grid_params_for_level(level)
+        physical_params = self.clone_physical_parameters(level)
+        runtime_params = self.clone_runtime_params_for_level(level)
+        kernel = self.clone_kernel_for_level(level)
+
+        return grid_params, physical_params, runtime_params, kernel
+
+    def clone_grid_params_for_level(self, level) -> GridParameters:
+        grid_params = GridParameters()
+        grid_params.reference_direction_index = self._grid_params.reference_direction_index
+        grid_params.periodic_boundary_in_x1 = self._grid_params.periodic_boundary_in_x1
+        grid_params.periodic_boundary_in_x2 = self._grid_params.periodic_boundary_in_x2
+        grid_params.periodic_boundary_in_x3 = self._grid_params.periodic_boundary_in_x3
+
+        grid_params.number_of_nodes_per_direction = list(self._grid_params.number_of_nodes_per_direction)
+        grid_params.blocks_per_direction = list(self._grid_params.blocks_per_direction)
+        grid_params.node_distance = self._grid_params.node_distance
+
+        if level > 0:
+            grid_params.node_distance /= (level * 2)
+            grid_params.number_of_nodes_per_direction = [grid_params.number_of_nodes_per_direction[0],
+                                                         grid_params.number_of_nodes_per_direction[1],
+                                                         grid_params.number_of_nodes_per_direction[2] * (level * 2)]
+
+            grid_params.blocks_per_direction = [grid_params.blocks_per_direction[0],
+                                                grid_params.blocks_per_direction[1],
+                                                grid_params.blocks_per_direction[2] * (level * 2)]
+
+        return grid_params
+
+    def clone_physical_parameters(self, level):
+        physical_params = PhysicalParameters()
+        physical_params.lattice_viscosity = self._physical_params.lattice_viscosity
+
+        if level > 0:
+            physical_params.lattice_viscosity *= (level * 2)
+
+        return physical_params
+
+    def clone_runtime_params_for_level(self, level):
+        runtime_params = RuntimeParameters()
+        runtime_params.number_of_timesteps = self._runtime_params.number_of_timesteps
+        runtime_params.number_of_threads = self._runtime_params.number_of_threads
+        runtime_params.timestep_log_interval = self._runtime_params.timestep_log_interval
+
+        if level > 0:
+            runtime_params.number_of_timesteps *= (level * 2)
+
+        return runtime_params
+
+    def clone_kernel_for_level(self, level):
+        kernel = LBMKernel(self._kernel.type)
+        kernel.use_forcing = self._kernel.use_forcing
+        kernel.forcing_in_x1 = self._kernel.forcing_in_x1
+        kernel.forcing_in_x2 = self._kernel.forcing_in_x2
+        kernel.forcing_in_x3 = self._kernel.forcing_in_x3
+
+        if level > 0:
+            kernel.forcing_in_x1 /= (level * 2)
+            kernel.forcing_in_x2 /= (level * 2)
+            kernel.forcing_in_x3 /= (level * 2)
+
+        return kernel
diff --git a/Python/norms.py b/Python/errors.py
similarity index 96%
rename from Python/norms.py
rename to Python/errors.py
index 78ae344591e4d91f28b9a98cf1e28ef447e2f62a..16e8c48ab9f0b7a46ed1372ef0b4d45738cccb1b 100644
--- a/Python/norms.py
+++ b/Python/errors.py
@@ -42,8 +42,8 @@ def mean_squared_error(real_values, numerical_values):
     return sum_of_squared_distances / num_values
 
 
-def l2_norm_error(real_values, numerical_values):
+def normalized_l2_error(real_values, numerical_values):
     sum_of_squared_distances = get_sum_of_squared_distances(real_values, numerical_values)
     sum_of_squared_real_values = sum(real_value ** 2 for real_value in real_values)
 
-    return math.sqrt(sum_of_squared_distances / sum_of_squared_real_values)
\ No newline at end of file
+    return math.sqrt(sum_of_squared_distances / sum_of_squared_real_values)
diff --git a/Python/poiseuille/analytical.py b/Python/poiseuille/analytical.py
index bca1a3ff95c1f28bca68ebb0c14efee48a1a5984..33e67595d94c50a1eb98751b7f10df9a031800e8 100644
--- a/Python/poiseuille/analytical.py
+++ b/Python/poiseuille/analytical.py
@@ -1,39 +1,52 @@
 from dataclasses import dataclass
 
 
+@dataclass
 class PoiseuilleSettings:
-
-    def __init__(self):
-        self.density = 1
-        self.viscosity = 0.005
-        self.height = 10
-        self.length = 1
-        self.pressure_in = 0
-        self.pressure_out = 0
-        self.force = 0
+    density = 1
+    viscosity = 0.005
+    height = 10
+    length = 1
+    pressure_in = 0
+    pressure_out = 0
+    force = 0
 
 
 def poiseuille_at_z(settings: PoiseuilleSettings, z: float):
     pressure_grad = ((settings.pressure_out - settings.pressure_in) / settings.length)
 
-    return (1 / settings.viscosity
+    return ((1 / settings.viscosity)
             * (- pressure_grad + settings.density * settings.force)
-            * z / 2 * (settings.height - z))
+            * (z / 2)
+            * (settings.height - z))
 
 
 def poiseuille_at_heights(settings: PoiseuilleSettings, heights):
     return [poiseuille_at_z(settings, z) for z in heights]
 
 
-if __name__ == '__main__':
-    # h1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
-    # h2 = [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5]
-    settings = PoiseuilleSettings()
-    settings.force = 1e-8
-    settings.height = 32
+def reynolds_number(settings: PoiseuilleSettings):
+    max_v = poiseuille_at_z(settings, settings.height / 2)
+    return max_v * settings.height / settings.viscosity
 
-    # print(max(poiseuille_at_heights(settings, h1)))
-    # print(max(poiseuille_at_heights(settings, h2)))
 
-    v = poiseuille_at_z(settings, 16)
-    print(v)
\ No newline at end of file
+if __name__ == '__main__':
+    sim_settings = PoiseuilleSettings()
+
+    sim_settings.force = 2e-7
+    sim_settings.viscosity = 1e-3
+    sim_settings.height = 16
+    print(f"v_max = ", poiseuille_at_z(sim_settings, sim_settings.height / 2))
+    print(f"Re =", reynolds_number(sim_settings))
+
+    sim_settings.viscosity *= 2
+    sim_settings.height *= 2
+    sim_settings.force /= 2
+    print(f"v_max = ", poiseuille_at_z(sim_settings, sim_settings.height / 2))
+    print(f"Re =", reynolds_number(sim_settings))
+
+    sim_settings.viscosity *= 2
+    sim_settings.height *= 2
+    sim_settings.force /= 2
+    print(f"v_max = ", poiseuille_at_z(sim_settings, sim_settings.height / 2))
+    print(f"Re =", reynolds_number(sim_settings))
diff --git a/Python/poiseuille/simulation.py b/Python/poiseuille/simulation.py
index f7f6d468f19820993c61b458b3b0138b8c886139..31ceb1ab9ef90fa4fd606bde4f47c45b8f7d7567 100644
--- a/Python/poiseuille/simulation.py
+++ b/Python/poiseuille/simulation.py
@@ -17,23 +17,30 @@ default_physical_params.lattice_viscosity = 0.005
 
 default_runtime_params = RuntimeParameters()
 default_runtime_params.number_of_threads = 4
-default_runtime_params.number_of_timesteps = 100000
-default_runtime_params.timestep_log_interval = 10000
+default_runtime_params.number_of_timesteps = 10000
+default_runtime_params.timestep_log_interval = 1000
+
+default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+default_kernel.use_forcing = True
+default_kernel.forcing_in_x1 = 1e-8
+
+default_writer = Writer()
+default_writer.output_path = "./output"
+default_writer.output_format = OutputFormat.BINARY
+
+
+default_kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+default_kernel.use_forcing = True
+default_kernel.forcing_in_x1 = 1e-8
 
 
 def run_simulation(physical_params=default_physical_params,
                    grid_params=default_grid_params,
-                   runtime_params=default_runtime_params):
+                   runtime_params=default_runtime_params,
+                   kernel=default_kernel,
+                   writer=default_writer):
     simulation = Simulation()
 
-    kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
-    kernel.use_forcing = True
-    kernel.forcing_in_x1 = 1e-8
-
-    writer = Writer()
-    writer.output_path = "./output"
-    writer.output_format = OutputFormat.BINARY
-
     simulation.set_kernel_config(kernel)
     simulation.set_physical_parameters(physical_params)
     simulation.set_grid_parameters(grid_params)
@@ -42,26 +49,26 @@ def run_simulation(physical_params=default_physical_params,
 
     no_slip_bc = NoSlipBoundaryCondition()
 
-    block_width = 3 * grid_params.node_distance
+    block_thickness = 3 * grid_params.node_distance
     simulation.add_object(
         GbCuboid3D(
-            grid_params.bounding_box.min_x1 - block_width,
-            grid_params.bounding_box.min_x2 - block_width,
-            grid_params.bounding_box.min_x3 - block_width,
-            grid_params.bounding_box.max_x1 + block_width,
-            grid_params.bounding_box.max_x2 + block_width,
+            grid_params.bounding_box.min_x1 - block_thickness,
+            grid_params.bounding_box.min_x2 - block_thickness,
+            grid_params.bounding_box.min_x3 - block_thickness,
+            grid_params.bounding_box.max_x1 + block_thickness,
+            grid_params.bounding_box.max_x2 + block_thickness,
             grid_params.bounding_box.min_x3),
         no_slip_bc,
         State.SOLID, "/geo/addWallZMin")
 
     simulation.add_object(
         GbCuboid3D(
-            grid_params.bounding_box.min_x1 - block_width,
-            grid_params.bounding_box.min_x2 - block_width,
+            grid_params.bounding_box.min_x1 - block_thickness,
+            grid_params.bounding_box.min_x2 - block_thickness,
             grid_params.bounding_box.max_x3,
-            grid_params.bounding_box.max_x1 + block_width,
-            grid_params.bounding_box.max_x2 + block_width,
-            grid_params.bounding_box.max_x3 + block_width),
+            grid_params.bounding_box.max_x1 + block_thickness,
+            grid_params.bounding_box.max_x2 + block_thickness,
+            grid_params.bounding_box.max_x3 + block_thickness),
         no_slip_bc,
         State.SOLID, "/geo/addWallZMax")
 
diff --git a/Python/poiseuille/test_poiseuille_l2.py b/Python/poiseuille/test_poiseuille_l2.py
index 508fda5d6f551d428d314ffbfa657b4b6b2a230a..39c8b6dffe05e3c352e7fd340857e43d8d5a3dc8 100644
--- a/Python/poiseuille/test_poiseuille_l2.py
+++ b/Python/poiseuille/test_poiseuille_l2.py
@@ -1,68 +1,90 @@
+import os
 import shutil
 import unittest
 
 import matplotlib.pyplot as plt
+import numpy as np
 import pyvista as pv
+from pyfluids.kernel import LBMKernel, KernelType
 from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+from scipy import stats
 
-from norms import l2_norm_error
+from errors import normalized_l2_error
 from poiseuille.analytical import poiseuille_at_heights, PoiseuilleSettings
 from poiseuille.simulation import run_simulation
 from vtk_utilities import vertical_column_from_mesh, get_values_from_indices
 
 
 class TestPoiseuilleFlow(unittest.TestCase):
+    node_distances = [1, 0.5, 0.25]
+    number_of_nodes = [16, 32, 64]
+    number_of_timesteps = [2_500_000, 5_000_000, 10_000_000]
+    forcings = [1e-9, 5e-10, 2.5e-10]
+    viscosities = [1e-3, 2e-3, 4e-3]
+
+    def zipped_settings(self):
+        return zip(self.node_distances,
+                   self.number_of_nodes,
+                   self.number_of_timesteps,
+                   self.forcings,
+                   self.viscosities)
 
     def test_poiseuille_flow(self):
-        self.skipTest("Skipping test! This test has not been implemented correctly yet")
+        self.skipTest("This test is not implemented correctly yet")
         plt.ion()
 
-        channel_height = 10
-        number_of_nodes = [8, 16, 32]
-        number_of_timesteps = [10_000, 20_000, 40_000]
-        viscosities = [5e-3, 1e-2, 2e-2]
-        l2_norm_results = []
-
         physical_params = PhysicalParameters()
 
         runtime_params = RuntimeParameters()
-        runtime_params.number_of_threads = 4
-        runtime_params.timestep_log_interval = 1000
-
-        for test_number, nodes_in_column in enumerate(number_of_nodes):
-            runtime_params.number_of_timesteps = number_of_timesteps[test_number]
-            physical_params.lattice_viscosity = viscosities[test_number]
-            delta_x = channel_height / nodes_in_column
-            grid_params = create_grid_params_with_nodes_in_column(nodes_in_column, delta_x)
-            l2_norm_result = get_l2_norm_for_simulation(grid_params, physical_params, runtime_params)
-            l2_norm_results.append(l2_norm_result)
-
-        plt.plot(number_of_nodes, l2_norm_results)
+        runtime_params.number_of_threads = os.cpu_count()
+        runtime_params.timestep_log_interval = 10000
+
+        kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+        kernel.use_forcing = True
+
+        normalized_l2_errors = []
+        for delta_x, nodes, timesteps, forcing, viscosity in self.zipped_settings():
+            physical_params.lattice_viscosity = viscosity
+            runtime_params.number_of_timesteps = timesteps
+            kernel.forcing_in_x1 = forcing
+
+            grid_params = create_grid_params_with_nodes_in_column(nodes, delta_x)
+            l2_error = get_l2_error_for_simulation(grid_params, physical_params, runtime_params, kernel)
+            normalized_l2_errors.append(l2_error)
+
+        nodes_as_log = [np.log10(node) for node in self.number_of_nodes]
+        l2_norms_as_log = [np.log10(l2) for l2 in normalized_l2_errors]
+        res = stats.linregress(nodes_as_log, l2_norms_as_log)
+
+        plt.xscale("log")
+        plt.yscale("log")
+        plt.plot(self.number_of_nodes, [np.power(10, res.intercept + res.slope * node) for node in nodes_as_log], 'r-')
+        plt.plot(self.number_of_nodes, normalized_l2_errors, "x:")
         plt.show()
 
-        self.assertTrue(l2_norm_results[1] <= l2_norm_results[0])
-        self.assertTrue(l2_norm_results[2] <= l2_norm_results[1])
-
-
-def run_simulation_with_settings(grid_params, physical_params, runtime_params, output_folder):
-    remove_existing_output_directory(output_folder)
-    run_simulation(physical_params, grid_params, runtime_params)
+        print(normalized_l2_errors)
+        self.assertAlmostEqual(res.slope, -2, places=2)
 
 
-def get_l2_norm_for_simulation(grid_params, physical_params, runtime_params):
+def get_l2_error_for_simulation(grid_params, physical_params, runtime_params, kernel):
     output_folder = "./output"
-    run_simulation_with_settings(grid_params, physical_params, runtime_params, output_folder)
+    run_simulation_with_settings(grid_params, physical_params, runtime_params, kernel, output_folder)
     heights = get_heights(output_folder, runtime_params)
 
-    numerical_results = get_numerical_results(runtime_params, output_folder, heights)
-    analytical_results = get_analytical_results(physical_params, heights, grid_params.number_of_nodes_per_direction[2])
+    numerical_results = get_numerical_results(runtime_params, output_folder)
+    analytical_results = get_analytical_results(grid_params, physical_params, kernel, heights)
 
     plt.plot(heights, numerical_results)
     plt.plot(heights, analytical_results)
     plt.legend(["numerical", "analytical"])
     plt.show()
 
-    return l2_norm_error(analytical_results, numerical_results)
+    return normalized_l2_error(analytical_results, numerical_results)
+
+
+def run_simulation_with_settings(grid_params, physical_params, runtime_params, kernel, output_folder):
+    shutil.rmtree(output_folder, ignore_errors=True)
+    run_simulation(physical_params, grid_params, runtime_params, kernel)
 
 
 def get_heights(output_folder, runtime_params):
@@ -72,7 +94,7 @@ def get_heights(output_folder, runtime_params):
     return heights
 
 
-def get_numerical_results(runtime_params, output_folder, heights):
+def get_numerical_results(runtime_params, output_folder):
     mesh_of_last_timestep = get_mesh_for_last_timestep(output_folder, runtime_params)
     velocities_in_x_direction = mesh_of_last_timestep.get_array("Vx")
     column_indices = vertical_column_from_mesh(mesh_of_last_timestep)
@@ -81,16 +103,12 @@ def get_numerical_results(runtime_params, output_folder, heights):
     return numerical_results
 
 
-def calculate_analytical_results(physical_params, height_values, channel_height):
-    settings = get_analytical_poiseuille_settings(channel_height, physical_params)
-    max_height = max(height_values)
-    height_values = [value / max_height * channel_height for value in height_values]
-    analytical_results = poiseuille_at_heights(settings, height_values)
-    return analytical_results
-
-
-def get_analytical_results(physical_params, heights, channel_height):
-    analytical_results = calculate_analytical_results(physical_params, heights, channel_height)
+def get_analytical_results(grid_params, physical_params, kernel, height_values):
+    channel_height = grid_params.number_of_nodes_per_direction[2]
+    settings = get_analytical_poiseuille_settings(channel_height, physical_params, kernel)
+    max_grid_height = channel_height * grid_params.node_distance
+    adjusted_height_values = [value / max_grid_height * channel_height for value in height_values]
+    analytical_results = poiseuille_at_heights(settings, adjusted_height_values)
     return analytical_results
 
 
@@ -100,18 +118,12 @@ def get_mesh_for_last_timestep(output_folder, runtime_params):
     return mesh_of_last_timestep
 
 
-def remove_existing_output_directory(output_dir):
-    shutil.rmtree(output_dir, ignore_errors=True)
-
-
-def get_analytical_poiseuille_settings(height, physical_params):
+def get_analytical_poiseuille_settings(height, physical_params, kernel):
     settings = PoiseuilleSettings()
     settings.height = height
     settings.viscosity = physical_params.lattice_viscosity
     settings.density = 1
-    settings.force = 1e-8
-
-    # print(settings)
+    settings.force = kernel.forcing_in_x1
 
     return settings
 
@@ -130,13 +142,10 @@ def get_heights_from_indices(mesh, indices):
 def create_grid_params_with_nodes_in_column(nodes_in_column, delta_x):
     grid_params = GridParameters()
     grid_params.node_distance = delta_x
-    grid_params.number_of_nodes_per_direction = [2, 2, nodes_in_column]
-    grid_params.blocks_per_direction = [1, 1, 6]
+    grid_params.number_of_nodes_per_direction = [1, 1, nodes_in_column]
+    grid_params.blocks_per_direction = [1, 1, 8]
     grid_params.periodic_boundary_in_x1 = True
     grid_params.periodic_boundary_in_x2 = True
     grid_params.periodic_boundary_in_x3 = False
 
-    print(f"GridParameters.node_distance = {grid_params.node_distance}")
-    print(f"GridParameters.number_of_nodes_per_direction = {grid_params.number_of_nodes_per_direction}")
-
     return grid_params
diff --git a/Python/requirements.txt b/Python/requirements.txt
index ded26051abb63145842f304ba865dc8487a8be73..8628634d1b85ebc0b07328d563d479f35641be97 100644
--- a/Python/requirements.txt
+++ b/Python/requirements.txt
@@ -14,8 +14,10 @@ py==1.10.0
 pyparsing==2.4.7
 pytest==6.2.1
 python-dateutil==2.8.1
-pyvista==0.27.4
+pyvista==0.28.1
+scipy==1.6.1
 scooby==0.5.6
 six==1.15.0
 toml==0.10.2
+transforms3d==0.3.1
 vtk==9.0.1
diff --git a/Python/tests/test_acousticscaling.py b/Python/tests/test_acousticscaling.py
new file mode 100644
index 0000000000000000000000000000000000000000..2da5314529f9559f9ac316f2d1bb3f1a9d0e1211
--- /dev/null
+++ b/Python/tests/test_acousticscaling.py
@@ -0,0 +1,115 @@
+import unittest
+from typing import List
+
+from pyfluids.kernel import LBMKernel, KernelType
+from pyfluids.parameters import GridParameters, PhysicalParameters, RuntimeParameters
+
+from acousticscaling import OneDirectionalAcousticScaling
+
+
+class OneDirectionalAcousticScalingTest(unittest.TestCase):
+
+    def setUp(self) -> None:
+        self.grid_params = self.make_grid_params()
+        self.physical_params = self.make_physical_params()
+        self.runtime_params = self.make_runtime_params()
+        self.kernel = self.make_kernel()
+
+        self.sut = OneDirectionalAcousticScaling(self.grid_params, self.physical_params, self.runtime_params,
+                                                 self.kernel)
+
+    def test_given_sim_parameters__when_scaling_level_zero__should_return_equal_sim_parameters(self):
+        factor = 1
+        actual_params = self.sut.configuration_for_scale_level(0)
+        actual_grid_params = actual_params[0]
+        actual_physical_params = actual_params[factor]
+        actual_runtime_params = actual_params[2]
+        actual_kernel = actual_params[3]
+
+        self.assert_parameters_scaled_by_factor(actual_grid_params, actual_kernel,
+                                                actual_physical_params, actual_runtime_params, factor)
+
+    def test_given_sim_parameters__when_scaling_level_one__should_return_sim_parameters_scaled_by_two(self):
+        actual_params = self.sut.configuration_for_scale_level(1)
+        actual_grid_params = actual_params[0]
+        actual_physical_params = actual_params[1]
+        actual_runtime_params = actual_params[2]
+        actual_kernel = actual_params[3]
+
+        self.assert_parameters_scaled_by_factor(actual_grid_params, actual_kernel,
+                                                actual_physical_params, actual_runtime_params, 2)
+
+    def assert_parameters_scaled_by_factor(self, actual_grid_params, actual_kernel,
+                                           actual_physical_params, actual_runtime_params, factor):
+        self.assert_grid_params_scaled_by_factor(actual_grid_params, factor=factor)
+        self.assert_physical_params_scaled_by_factor(actual_physical_params, factor=factor)
+        self.assert_runtime_params_scaled_by_factor(actual_runtime_params, factor=factor)
+        self.assert_kernel_forcing_scaled_by_factor(actual_kernel, factor=factor)
+
+    def assert_grid_params_scaled_by_factor(self, actual_grid_params: GridParameters, factor: int):
+        expected_nodes_per_direction = self.scaled_list(self.grid_params.number_of_nodes_per_direction, factor)
+        expected_blocks_per_direction = self.scaled_list(self.grid_params.blocks_per_direction, factor)
+        expected_node_distance = self.grid_params.node_distance / factor
+        self.assertEqual(expected_node_distance, actual_grid_params.node_distance)
+        self.assertEqual(expected_nodes_per_direction, actual_grid_params.number_of_nodes_per_direction)
+        self.assertEqual(expected_blocks_per_direction, actual_grid_params.blocks_per_direction)
+        self.assertEqual(self.grid_params.reference_direction_index, actual_grid_params.reference_direction_index)
+        self.assertEqual(self.grid_params.periodic_boundary_in_x1, actual_grid_params.periodic_boundary_in_x1)
+        self.assertEqual(self.grid_params.periodic_boundary_in_x2, actual_grid_params.periodic_boundary_in_x2)
+        self.assertEqual(self.grid_params.periodic_boundary_in_x3, actual_grid_params.periodic_boundary_in_x3)
+
+    def assert_physical_params_scaled_by_factor(self, actual_params: PhysicalParameters, factor: int):
+        self.assertEqual(self.physical_params.lattice_viscosity * factor, actual_params.lattice_viscosity)
+        self.assertEqual(self.physical_params.bulk_viscosity_factor, actual_params.bulk_viscosity_factor)
+
+    def assert_runtime_params_scaled_by_factor(self, actual_params: RuntimeParameters, factor: int):
+        self.assertEqual(self.runtime_params.number_of_timesteps * factor, actual_params.number_of_timesteps)
+        self.assertEqual(self.runtime_params.number_of_threads, actual_params.number_of_threads)
+        self.assertEqual(self.runtime_params.timestep_log_interval, actual_params.timestep_log_interval)
+
+    def assert_kernel_forcing_scaled_by_factor(self, actual_kernel: LBMKernel, factor: int):
+        self.assertEqual(self.kernel.type, actual_kernel.type)
+        self.assertEqual(self.kernel.use_forcing, actual_kernel.use_forcing)
+        self.assertAlmostEqual(self.kernel.forcing_in_x1 / factor, actual_kernel.forcing_in_x1)
+        self.assertAlmostEqual(self.kernel.forcing_in_x2, actual_kernel.forcing_in_x2)
+        self.assertAlmostEqual(self.kernel.forcing_in_x3, actual_kernel.forcing_in_x3)
+
+    @staticmethod
+    def scaled_list(list_to_scale: List[int], factor: int) -> List[int]:
+        return [list_to_scale[0], list_to_scale[1], list_to_scale[2] * factor]
+
+    @staticmethod
+    def make_kernel():
+        kernel = LBMKernel(KernelType.CompressibleCumulantFourthOrderViscosity)
+        kernel.use_forcing = True
+        kernel.forcing_in_x1 = 5e-10
+        return kernel
+
+    @staticmethod
+    def make_runtime_params():
+        runtime_params = RuntimeParameters()
+        runtime_params.number_of_threads = 4
+        runtime_params.number_of_timesteps = 4_000_000
+        runtime_params.timestep_log_interval = 1_000_000
+        return runtime_params
+
+    @staticmethod
+    def make_physical_params():
+        physical_params = PhysicalParameters()
+        physical_params.lattice_viscosity = 1e-4
+        return physical_params
+
+    @staticmethod
+    def make_grid_params():
+        grid_params = GridParameters()
+        grid_params.node_distance = 1
+        grid_params.number_of_nodes_per_direction = [1, 1, 16]
+        grid_params.blocks_per_direction = [1, 1, 16]
+        grid_params.periodic_boundary_in_x1 = True
+        grid_params.periodic_boundary_in_x2 = True
+
+        return grid_params
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Python/tests/test_boundaryconditions.py b/Python/tests/test_boundaryconditions.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a7d61f36337398fc5621540951f15b72262b17b
--- /dev/null
+++ b/Python/tests/test_boundaryconditions.py
@@ -0,0 +1,61 @@
+import unittest
+from pyfluids.boundaryconditions import *
+
+
+class BoundaryConditionsTest(unittest.TestCase):
+
+    def test__can_create_no_slip_bc(self):
+        """
+        Should be able to create NoSlipBoundaryCondition
+        """
+        sut = NoSlipBoundaryCondition()
+
+    def test__can_create_velocity_bc(self):
+        """
+        Should be able to create VelocityBoundaryCondition
+        """
+        sut = VelocityBoundaryCondition()
+
+    def test__can_create_velocity_bc_with_directions_function_and_time(self):
+        """
+        Should be able to create VelocityBoundaryCondition with directions, function and start/end time
+        """
+        from pymuparser import Parser
+
+        parser = Parser()
+        parser.expression = "1"
+        sut = VelocityBoundaryCondition(True, True, True, parser, 0, 1)
+
+    def test__can_create_velocity_bc_with_directions__function_per_direction__and__time(self):
+        """
+        Should be able to create VelocityBoundaryCondition with directions, function per direction and start/end time
+        """
+        from pymuparser import Parser
+
+        f1 = Parser()
+        f1.expression = "1"
+
+        f2 = Parser()
+        f2.expression = "1"
+
+        f3 = Parser()
+        f3.expression = "1"
+        sut = VelocityBoundaryCondition(True, True, True, f1, f2, f3, 0, 1)
+
+    def test__can_create_velocity_bc_with_speeds_and_times_per_direction(self):
+        """
+        Should be able to create VelocityBoundaryCondition with speeds and start/end times per direction
+        """
+        vx1, vx2, vx3 = 1, 2, 3
+        start1, end1 = 0, 1
+        start2, end2 = 1, 2
+        start3, end3 = 2, 3
+
+        sut = VelocityBoundaryCondition(vx1, start1, end1, vx2, start2, end2, vx3, start3, end3)
+
+    def test__can_create_non_reflecting_outflow(self):
+        """
+        Should be able to create NonReflectingOutflow
+        """
+
+        sut = NonReflectingOutflow()
diff --git a/Python/test_geometry.py b/Python/tests/test_geometry.py
similarity index 100%
rename from Python/test_geometry.py
rename to Python/tests/test_geometry.py
diff --git a/Python/test_kernel.py b/Python/tests/test_kernel.py
similarity index 85%
rename from Python/test_kernel.py
rename to Python/tests/test_kernel.py
index 5e5487903a279501b0acdba1ab192ac8dd381383..b1016f15308a77c9025787e061355819cbca3874 100644
--- a/Python/test_kernel.py
+++ b/Python/tests/test_kernel.py
@@ -51,3 +51,10 @@ class TestLBMKernel(unittest.TestCase):
         self.assertEqual(self.sut.forcing_in_x2, 8)
         self.assertEqual(self.sut.forcing_in_x3, 5)
 
+    def test_lbm_kernel__when_getting_type__should_equal_kernel_type_enum_value(self) -> None:
+        """
+        WHEN getting the kernel type IT should equal the corresponding KernelType enum value
+        """
+
+        actual = self.sut.type
+        self.assertEqual(KernelType.BGK, actual)
diff --git a/Python/vtk_utilities.py b/Python/vtk_utilities.py
index e8c16ce02c85518edacde024a012c4ce261fbc14..14d13a40d85e669a463c0b5b31ca2a21e771af75 100644
--- a/Python/vtk_utilities.py
+++ b/Python/vtk_utilities.py
@@ -1,20 +1,29 @@
 import math
+from typing import List
+
 import pyvista as pv
 
 
 def vertical_column_from_mesh(mesh):
     last_seen = math.inf
     relevant_indices = []
+    first_x = 0
+    first_y = 0
     for index, point in enumerate(mesh.points):
-        if point[2] == last_seen:
+        if index == 0:
+            first_x = point[0]
+            first_y = point[1]
+
+        if (point[0] != first_x or point[1] != first_y) and point[2] == last_seen:
             continue
+
         relevant_indices.append(index)
         last_seen = point[2]
 
     return relevant_indices
 
 
-def get_values_from_indices(array, indices):
+def get_values_from_indices(array, indices) -> List[float]:
     return [array[index] for index in indices]
 
 
diff --git a/apps/cpu/Applications.cmake b/apps/cpu/Applications.cmake
index d8dff8b8c5f776d1d27e9217d7b050ab4022d8a4..e8902e5ffbb3720365476afef50a4f3fbd0ddf76 100644
--- a/apps/cpu/Applications.cmake
+++ b/apps/cpu/Applications.cmake
@@ -1,8 +1,19 @@
+add_subdirectory(${APPS_ROOT_CPU}/PoiseuilleFlow)
+add_subdirectory(${APPS_ROOT_CPU}/HerschelBulkleySphere)
+add_subdirectory(${APPS_ROOT_CPU}/HerschelBulkleyModel)
+add_subdirectory(${APPS_ROOT_CPU}/rheometer)
+add_subdirectory(${APPS_ROOT_CPU}/CouetteFlow)
+add_subdirectory(${APPS_ROOT_CPU}/Multiphase)
+add_subdirectory(${APPS_ROOT_CPU}/ViskomatXL)
+add_subdirectory(${APPS_ROOT_CPU}/sphere)
+add_subdirectory(${APPS_ROOT_CPU}/FlowAroundCylinder)
+add_subdirectory(${APPS_ROOT_CPU}/LaminarTubeFlow)
+add_subdirectory(${APPS_ROOT_CPU}/MultiphaseDropletTest)
+
 #add_subdirectory(tests)
 #add_subdirectory(Applications/gridRf)
 #add_subdirectory(Applications/greenvortex)
 # add_subdirectory(Applications/micropart)
-add_subdirectory(${APPS_ROOT_CPU}/sphere)
 #add_subdirectory(Applications/vfscript)
 #add_subdirectory(Applications/reefer)
 #add_subdirectory(Applications/bananas)
@@ -11,8 +22,6 @@ add_subdirectory(${APPS_ROOT_CPU}/sphere)
 #add_subdirectory(Applications/bananas2)
 # add_subdirectory(Applications/plate)
 # add_subdirectory(Applications/plate2)
-add_subdirectory(${APPS_ROOT_CPU}/FlowAroundCylinder)
-add_subdirectory(${APPS_ROOT_CPU}/LaminarTubeFlow)
 # add_subdirectory(Applications/LaminarTubeFlowConv)
 #add_subdirectory(Applications/cylinderSt)
 #add_subdirectory(Applications/mpichTest)
@@ -54,7 +63,6 @@ add_subdirectory(${APPS_ROOT_CPU}/LaminarTubeFlow)
 #add_subdirectory(Applications/levels)
 #add_subdirectory(Applications/AcousticPulse)
 #add_subdirectory(Applications/screw)
-#add_subdirectory(Applications/PoiseuilleFlow)
 #add_subdirectory(Applications/InterfaceTest)
 #add_subdirectory(Applications/teperm)
 #add_subdirectory(Applications/Thermoplast)
@@ -64,10 +72,5 @@ add_subdirectory(${APPS_ROOT_CPU}/LaminarTubeFlow)
 #add_subdirectory(Applications/bChannelVA)
 #add_subdirectory(Applications/OrganPipe)
 #add_subdirectory(Applications/LidDrivenCavity)
-add_subdirectory(${APPS_ROOT_CPU}/HerschelBulkleySphere)
-add_subdirectory(${APPS_ROOT_CPU}/HerschelBulkleyModel)
-add_subdirectory(${APPS_ROOT_CPU}/rheometer)
-add_subdirectory(${APPS_ROOT_CPU}/CouetteFlow)
-add_subdirectory(${APPS_ROOT_CPU}/Multiphase)
-add_subdirectory(${APPS_ROOT_CPU}/ViskomatXL)
+
 
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cfg b/apps/cpu/LaminarTubeFlow/ltf.cfg
index 94919cc3463c6a60dfd334e1c0505d60e56446d8..8b8e33e4998835da80d2121925acc7d95c3ccd20 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cfg
+++ b/apps/cpu/LaminarTubeFlow/ltf.cfg
@@ -1,5 +1,5 @@
 pathname = d:/temp/LaminarTubeFlow
-numOfThreads = 4
+numOfThreads = 1
 availMem = 10e9
 
 #Grid
@@ -22,5 +22,5 @@ restartStep = 100000
 cpStart = 100000
 cpStep = 100000
 
-outTime = 1
-endTime = 100
\ No newline at end of file
+outTime = 1000
+endTime = 1000
\ No newline at end of file
diff --git a/apps/cpu/LaminarTubeFlow/ltf.cpp b/apps/cpu/LaminarTubeFlow/ltf.cpp
index 71340ab656b6fca7d5bcc534e69a2e25ca10fa9c..e523dd2de7416ea5189dbceab200725d89f15424 100644
--- a/apps/cpu/LaminarTubeFlow/ltf.cpp
+++ b/apps/cpu/LaminarTubeFlow/ltf.cpp
@@ -315,7 +315,7 @@ void run(string configname)
       auto timeDepBC = make_shared<TimeDependentBCCoProcessor>(TimeDependentBCCoProcessor(grid, timeBCSch));
       timeDepBC->addInteractor(inflowInt);
 
-      //omp_set_num_threads(numOfThreads);
+      omp_set_num_threads(numOfThreads);
       numOfThreads = 1;
       SPtr<UbScheduler> stepGhostLayer(visSch);
       SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, int(endTime)));
diff --git a/apps/cpu/Multiphase/Multiphase.cfg b/apps/cpu/Multiphase/Multiphase.cfg
index d52694ac838c8ae029ebf85476ab6db68de11223..c294ea68ce96c751030380d52d16eb35d06f9faa 100644
--- a/apps/cpu/Multiphase/Multiphase.cfg
+++ b/apps/cpu/Multiphase/Multiphase.cfg
@@ -1,11 +1,11 @@
-pathname = d:/temp/MultiphaseNew4
+pathname = d:/temp/MultiphaseNew5
 #pathGeo = d:/Projects/VirtualFluids-Multiphase/source/Applications/Multiphase/backup
 pathGeo = d:/Projects/VirtualFluidsCombined/apps/cpu/Multiphase/backup
 #geoFile = JetBreakupR.ASCII.stl
 #geoFile = inlet1.stl
 geoFile = tubeTransformed.stl
 
-numOfThreads = 1
+numOfThreads = 4
 availMem = 10e9
 
 #Grid
@@ -20,21 +20,21 @@ availMem = 10e9
 #boundingBox = -40 40 -1.0 -21.0 -40 40 #(Jet Breakup2) (Original without inlet length)
 #boundingBox = -40 40 1.0 11.0 -40 40 #(Jet Breakup2) (Original without inlet length)
 #boundingBox = -40e-3 40e-3 1.0e-3 11.0e-3 -403-3 40e-3 #(Jet Breakup2) (Original without inlet length)
-blocknx = 20 20 20
+#blocknx = 20 20 20
 
-boundingBox = 6.0e-3 16.0e-3 -40e-3 40e-3 -40e-3 40e-3
+boundingBox = 6.0e-3 46.0e-3 -5e-3 5e-3 -5e-3 5e-3
 blocknx = 20 20 20
 
-dx = 0.5e-3
+dx = 1.66666666667e-4
 refineLevel = 0
 
 #Simulation
-uLB = 0.0005 #inlet velocity
+uLB = 0.005 #inlet velocity
 #uF2 = 0.0001
 Re = 10
 nuL =1e-2# 1.0e-5 #!1e-2
 nuG =1e-2# 1.16e-4 #!1e-2
-densityRatio = 30
+densityRatio = 1000
 sigma = 1e-5 #4.66e-3 #surface tension 1e-4 ./. 1e-5
 interfaceThickness = 5
 radius = 615.0 (Jet Breakup)
@@ -55,5 +55,5 @@ restartStep = 100000
 cpStart = 100000
 cpStep = 100000
 
-outTime = 100
+outTime = 1
 endTime = 10000
\ No newline at end of file
diff --git a/apps/cpu/Multiphase/Multiphase.cpp b/apps/cpu/Multiphase/Multiphase.cpp
index 10ff2b39618c64ef09edd902471387e7ebc90c70..deb2845f4278661bb970ea68b043e3cb435bffcc 100644
--- a/apps/cpu/Multiphase/Multiphase.cpp
+++ b/apps/cpu/Multiphase/Multiphase.cpp
@@ -111,10 +111,15 @@ void run(string configname)
         //////////////////////////////////////////////////////////////////////////
         // restart
         SPtr<UbScheduler> rSch(new UbScheduler(cpStep, cpStart));
-        SPtr<MPIIOMigrationBECoProcessor> rcp = make_shared<MPIIOMigrationBECoProcessor>(grid, rSch, pathname, comm);
+        //SPtr<MPIIORestartCoProcessor> rcp(new MPIIORestartCoProcessor(grid, rSch, pathname, comm));
+        //SPtr<MPIIOMigrationCoProcessor> rcp(new MPIIOMigrationCoProcessor(grid, rSch, pathname, comm));
+        SPtr<MPIIOMigrationBECoProcessor> rcp(new MPIIOMigrationBECoProcessor(grid, rSch, pathname, comm));
+        rcp->setNu(nuLB);
+        rcp->setNuLG(nuL, nuG);
+        rcp->setDensityRatio(densityRatio);
+
         rcp->setLBMKernel(kernel);
         rcp->setBCProcessor(bcProc);
-        rcp->setNu(nuLB);
         //////////////////////////////////////////////////////////////////////////
 
         mu::Parser fctF1;
@@ -140,7 +145,7 @@ void run(string configname)
         SPtr<D3Q27Interactor> cylInt;
         if (newStart) {
 
-      //  if (newStart) {
+            //  if (newStart) {
 
             // bounding box
             /*double g_minX1 = 0.0;
@@ -163,7 +168,7 @@ void run(string configname)
             SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
             if (myid == 0)
                 GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube",
-                                           WbWriterVtkXmlBinary::getInstance());
+                    WbWriterVtkXmlBinary::getInstance());
 
             if (myid == 0) UBLOG(logINFO, "Read geoFile:start");
             SPtr<GbTriFaceMesh3D> cylinder = make_shared<GbTriFaceMesh3D>();
@@ -176,10 +181,10 @@ void run(string configname)
                 new GbCuboid3D(g_minX1*0.5 - dx, g_minX2 - dx, g_minX3*0.5 - dx, g_maxX1*0.5 + dx, g_minX2, g_maxX3*0.5 + dx));
             if (myid == 0)  GbSystem3D::writeGeoObject(geoInflowF1.get(), pathname + "/geo/geoInflowF1",                                           WbWriterVtkXmlASCII::getInstance());
 
-            GbCylinder3DPtr cylinder1(new GbCylinder3D(g_minX1-dx, 0.0, 0.0, cylinder->getX1Maximum(), 0.0, 0.0, 3e-3));
+            GbCylinder3DPtr cylinder1(new GbCylinder3D(g_minX1-dx, 0.0, 0.0, g_minX1+dx, 0.0, 0.0, 3e-3));
             if (myid == 0)
                 GbSystem3D::writeGeoObject(cylinder1.get(), pathname + "/geo/cylinder1",
-                                           WbWriterVtkXmlASCII::getInstance());
+                    WbWriterVtkXmlASCII::getInstance());
 
             //GbCylinder3DPtr cylinder2(
             //    new GbCylinder3D(0.0, g_minX2 - 2.0 * dx / 2.0, 0.0, 0.0, g_minX2 + 4.0 * dx, 0.0, 8.0+2.0*dx));
@@ -192,7 +197,7 @@ void run(string configname)
             GbCuboid3DPtr geoOutflow(new GbCuboid3D(g_minX1, g_maxX2, g_minX3, g_maxX1, g_maxX2 + dx, g_maxX3));
             if (myid == 0)
                 GbSystem3D::writeGeoObject(geoOutflow.get(), pathname + "/geo/geoOutflow",
-                                           WbWriterVtkXmlASCII::getInstance());
+                    WbWriterVtkXmlASCII::getInstance());
 
             // double blockLength = blocknx[0] * dx;
 
@@ -254,32 +259,32 @@ void run(string configname)
 
             SPtr<D3Q27Interactor> outflowInt(new D3Q27Interactor(geoOutflow, grid, denBCAdapter, Interactor3D::SOLID));
 
-      // Create boundary conditions geometry
-      GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_minX1, g_maxX2 + dx, g_maxX3));
-      GbSystem3D::writeGeoObject(wallXmin.get(), pathname + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
-      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3));
-      GbSystem3D::writeGeoObject(wallXmax.get(), pathname + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
-      GbCuboid3DPtr wallZmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_minX3));
-      GbSystem3D::writeGeoObject(wallZmin.get(), pathname + "/geo/wallZmin", WbWriterVtkXmlASCII::getInstance());
-      GbCuboid3DPtr wallZmax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
-      GbSystem3D::writeGeoObject(wallZmax.get(), pathname + "/geo/wallZmax", WbWriterVtkXmlASCII::getInstance());
-      GbCuboid3DPtr wallYmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_minX2, g_maxX3));
-      GbSystem3D::writeGeoObject(wallYmin.get(), pathname + "/geo/wallYmin", WbWriterVtkXmlASCII::getInstance());
-      GbCuboid3DPtr wallYmax(new GbCuboid3D(g_minX1 - dx, g_maxX2, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3));
-      GbSystem3D::writeGeoObject(wallYmax.get(), pathname + "/geo/wallYmax", WbWriterVtkXmlASCII::getInstance());
-
-      // Add boundary conditions to grid generator
-      SPtr<D3Q27Interactor> wallXminInt(new D3Q27Interactor(wallXmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> wallXmaxInt(new D3Q27Interactor(wallXmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> wallZminInt(new D3Q27Interactor(wallZmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> wallZmaxInt(new D3Q27Interactor(wallZmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> wallYminInt(new D3Q27Interactor(wallYmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
-      SPtr<D3Q27Interactor> wallYmaxInt(new D3Q27Interactor(wallYmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
-
-
-      cylInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(cylinder1, grid, velBCAdapterF1, Interactor3D::SOLID));
-      cylInt->addBCAdapter(velBCAdapterF2);
-      //SPtr<D3Q27Interactor> cyl2Int(new D3Q27Interactor(cylinder2, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            // Create boundary conditions geometry
+            GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_minX1, g_maxX2 + dx, g_maxX3));
+            GbSystem3D::writeGeoObject(wallXmin.get(), pathname + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
+            GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3));
+            GbSystem3D::writeGeoObject(wallXmax.get(), pathname + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
+            GbCuboid3DPtr wallZmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_minX3));
+            GbSystem3D::writeGeoObject(wallZmin.get(), pathname + "/geo/wallZmin", WbWriterVtkXmlASCII::getInstance());
+            GbCuboid3DPtr wallZmax(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_maxX3, g_maxX1 + dx, g_maxX2 + dx, g_maxX3 + dx));
+            GbSystem3D::writeGeoObject(wallZmax.get(), pathname + "/geo/wallZmax", WbWriterVtkXmlASCII::getInstance());
+            GbCuboid3DPtr wallYmin(new GbCuboid3D(g_minX1 - dx, g_minX2 - dx, g_minX3 - dx, g_maxX1 + dx, g_minX2, g_maxX3));
+            GbSystem3D::writeGeoObject(wallYmin.get(), pathname + "/geo/wallYmin", WbWriterVtkXmlASCII::getInstance());
+            GbCuboid3DPtr wallYmax(new GbCuboid3D(g_minX1 - dx, g_maxX2, g_minX3 - dx, g_maxX1 + dx, g_maxX2 + dx, g_maxX3));
+            GbSystem3D::writeGeoObject(wallYmax.get(), pathname + "/geo/wallYmax", WbWriterVtkXmlASCII::getInstance());
+
+            // Add boundary conditions to grid generator
+            SPtr<D3Q27Interactor> wallXminInt(new D3Q27Interactor(wallXmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> wallXmaxInt(new D3Q27Interactor(wallXmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> wallZminInt(new D3Q27Interactor(wallZmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> wallZmaxInt(new D3Q27Interactor(wallZmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> wallYminInt(new D3Q27Interactor(wallYmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
+            SPtr<D3Q27Interactor> wallYmaxInt(new D3Q27Interactor(wallYmax, grid, noSlipBCAdapter, Interactor3D::SOLID));
+
+
+            cylInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(cylinder1, grid, velBCAdapterF1, Interactor3D::SOLID));
+            cylInt->addBCAdapter(velBCAdapterF2);
+            //SPtr<D3Q27Interactor> cyl2Int(new D3Q27Interactor(cylinder2, grid, noSlipBCAdapter, Interactor3D::SOLID));
 
             SPtr<Grid3DVisitor> metisVisitor(
                 new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW));
@@ -288,8 +293,8 @@ void run(string configname)
             intHelper.addInteractor(tubes);
             //intHelper.addInteractor(outflowInt);
             //intHelper.addInteractor(cyl2Int);
-            
-            
+
+
             intHelper.addInteractor(wallXminInt);
             intHelper.addInteractor(wallXmaxInt);
             intHelper.addInteractor(wallZminInt);
@@ -297,7 +302,7 @@ void run(string configname)
             intHelper.addInteractor(wallYminInt);
             intHelper.addInteractor(wallYmaxInt);
             //intHelper.addInteractor(inflowF1Int);
-            
+
 
             intHelper.selectBlocks();
 
@@ -331,7 +336,7 @@ void run(string configname)
             }
 
             MultiphaseSetKernelBlockVisitor kernelVisitor(kernel, nuL, nuG, densityRatio, beta, kappa, theta, availMem,
-                                                          needMem);
+                needMem);
 
             grid->accept(kernelVisitor);
 
@@ -382,16 +387,16 @@ void run(string configname)
             if (myid == 0)
                 UBLOG(logINFO, "Restart - end");
         }
-        
+
         TwoDistributionsSetConnectorsBlockVisitor setConnsVisitor(comm);
         grid->accept(setConnsVisitor);
-        
+
         //ThreeDistributionsSetConnectorsBlockVisitor setConnsVisitor(comm);
         //grid->accept(setConnsVisitor);
 
         SPtr<UbScheduler> visSch(new UbScheduler(outTime));
         SPtr<WriteMultiphaseQuantitiesCoProcessor> pp(new WriteMultiphaseQuantitiesCoProcessor(
-        //SPtr<WriteMacroscopicQuantitiesCoProcessor> pp(new WriteMacroscopicQuantitiesCoProcessor(
+            //SPtr<WriteMacroscopicQuantitiesCoProcessor> pp(new WriteMacroscopicQuantitiesCoProcessor(
             grid, visSch, pathname, WbWriterVtkXmlBinary::getInstance(), conv, comm));
         pp->process(0);
 
@@ -414,7 +419,7 @@ void run(string configname)
         calculator->addCoProcessor(rcp);
 
 
-        
+
 
         if (myid == 0)
             UBLOG(logINFO, "Simulation-start");
diff --git a/apps/cpu/MultiphaseDropletTest.zip b/apps/cpu/MultiphaseDropletTest.zip
new file mode 100644
index 0000000000000000000000000000000000000000..5eb13a6c0bacfbf392deb00c6b388ba282c038e0
Binary files /dev/null and b/apps/cpu/MultiphaseDropletTest.zip differ
diff --git a/apps/cpu/MultiphaseDropletTest/CMakeLists.txt b/apps/cpu/MultiphaseDropletTest/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f01ad73a0b28f7050667dde7b9408ac00b76e42
--- /dev/null
+++ b/apps/cpu/MultiphaseDropletTest/CMakeLists.txt
@@ -0,0 +1,3 @@
+PROJECT(MultiphaseDropletTest)
+
+vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore basics ${MPI_CXX_LIBRARIES} FILES droplet.cpp )
diff --git a/apps/cpu/MultiphaseDropletTest/DropletTest.cfg b/apps/cpu/MultiphaseDropletTest/DropletTest.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..72c0144890c2fd8ba25fa0dfb7528fdbd1b889d8
--- /dev/null
+++ b/apps/cpu/MultiphaseDropletTest/DropletTest.cfg
@@ -0,0 +1,41 @@
+pathname = d:/temp/MultiphaseDropletTest
+
+numOfThreads = 4
+availMem = 10e9
+
+#Grid
+
+boundingBox = 0 128 0 64 0 64
+blocknx = 8 8 8
+
+dx = 1
+refineLevel = 0
+
+#Simulation
+uLB = 0.005 
+Re = 10
+nuL =1e-2# 1.0e-5 #!1e-2
+nuG =1e-2# 1.16e-4 #!1e-2
+densityRatio = 1000
+sigma = 1e-5 #4.66e-3 #surface tension 1e-4 ./. 1e-5
+interfaceThickness = 5
+radius = 16
+contactAngle = 110.0
+#gravity = 0.0
+gravity = -5.04e-6
+phi_L = 0.0
+phi_H = 1.0
+Phase-field Relaxation = 0.6
+Mobility = 0.02 # 0.01 ./. 0.08, fine correction of Phase-field Relaxation parameter, to activate it need to change in kernel tauH to tauH1
+
+
+logToFile = false
+
+newStart = true
+restartStep = 100000
+
+cpStart = 100000
+cpStep = 100000
+
+outTime = 1
+endTime = 10000
\ No newline at end of file
diff --git a/apps/cpu/MultiphaseDropletTest/droplet.cpp b/apps/cpu/MultiphaseDropletTest/droplet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..092d5a16a36b47c726f0f85463484f0c97fdecd0
--- /dev/null
+++ b/apps/cpu/MultiphaseDropletTest/droplet.cpp
@@ -0,0 +1,306 @@
+#include <iostream>
+#include <string>
+#include <memory>
+
+#include "VirtualFluids.h"
+
+using namespace std;
+
+void run(string configname)
+{
+    try {
+        ConfigurationFile config;
+        config.load(configname);
+
+        string pathname            = config.getValue<string>("pathname");
+        int numOfThreads           = config.getValue<int>("numOfThreads");
+        vector<int> blocknx        = config.getVector<int>("blocknx");
+        vector<double> boundingBox = config.getVector<double>("boundingBox");
+        double uLB             = config.getValue<double>("uLB");
+        double nuL             = config.getValue<double>("nuL");
+        double nuG             = config.getValue<double>("nuG");
+        double densityRatio    = config.getValue<double>("densityRatio");
+        double sigma           = config.getValue<double>("sigma");
+        int interfaceThickness = config.getValue<int>("interfaceThickness");
+        double radius          = config.getValue<double>("radius");
+        double theta           = config.getValue<double>("contactAngle");
+        double gr              = config.getValue<double>("gravity");
+        double phiL            = config.getValue<double>("phi_L");
+        double phiH            = config.getValue<double>("phi_H");
+        double tauH            = config.getValue<double>("Phase-field Relaxation");
+        double mob             = config.getValue<double>("Mobility");
+
+        double endTime     = config.getValue<double>("endTime");
+        double outTime     = config.getValue<double>("outTime");
+        double availMem    = config.getValue<double>("availMem");
+        int refineLevel    = config.getValue<int>("refineLevel");
+        double Re          = config.getValue<double>("Re");
+        double dx          = config.getValue<double>("dx");
+        bool logToFile     = config.getValue<bool>("logToFile");
+        //double restartStep = config.getValue<double>("restartStep");
+        //double cpStart     = config.getValue<double>("cpStart");
+        //double cpStep      = config.getValue<double>("cpStep");
+        bool newStart      = config.getValue<bool>("newStart");
+
+        double beta  = 12 * sigma / interfaceThickness;
+        double kappa = 1.5 * interfaceThickness * sigma;
+
+        SPtr<Communicator> comm = MPICommunicator::getInstance();
+        int myid                = comm->getProcessID();
+
+        if (myid == 0)
+            UBLOG(logINFO, "Droplet Test: Start!");
+
+        if (logToFile) {
+#if defined(__unix__)
+            if (myid == 0) {
+                const char *str = pathname.c_str();
+                mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+            }
+#endif
+
+            if (myid == 0) {
+                stringstream logFilename;
+                logFilename << pathname + "/logfile" + UbSystem::toString(UbSystem::getTimeStamp()) + ".txt";
+                UbLog::output_policy::setStream(logFilename.str());
+            }
+        }
+
+        //Sleep(30000);
+
+        // LBMReal dLB = 0; // = length[1] / dx;
+        LBMReal rhoLB = 0.0;
+        LBMReal nuLB  = nuL; //(uLB*dLB) / Re;
+
+        SPtr<LBMUnitConverter> conv(new LBMUnitConverter());
+
+        //const int baseLevel = 0;
+
+        SPtr<LBMKernel> kernel;
+
+        //kernel = SPtr<LBMKernel>(new MultiphaseScratchCumulantLBMKernel());
+        kernel = SPtr<LBMKernel>(new MultiphaseCumulantLBMKernel());
+        //kernel = SPtr<LBMKernel>(new MultiphaseTwoPhaseFieldsCumulantLBMKernel());
+
+        kernel->setWithForcing(true);
+        kernel->setForcingX1(gr);
+        kernel->setForcingX2(0.0);
+        kernel->setForcingX3(0.0);
+
+        kernel->setPhiL(phiL);
+        kernel->setPhiH(phiH);
+        kernel->setPhaseFieldRelaxation(tauH);
+        kernel->setMobility(mob);
+
+        SPtr<BCProcessor> bcProc(new BCProcessor());
+        // BCProcessorPtr bcProc(new ThinWallBCProcessor());
+
+        kernel->setBCProcessor(bcProc);
+
+        SPtr<Grid3D> grid(new Grid3D(comm));
+        grid->setDeltaX(dx);
+        grid->setBlockNX(blocknx[0], blocknx[1], blocknx[2]);
+        grid->setPeriodicX1(true);
+        grid->setPeriodicX2(true);
+        grid->setPeriodicX3(true);
+
+        //////////////////////////////////////////////////////////////////////////
+        // restart
+        //SPtr<UbScheduler> rSch(new UbScheduler(cpStep, cpStart));
+        ////SPtr<MPIIORestartCoProcessor> rcp(new MPIIORestartCoProcessor(grid, rSch, pathname, comm));
+        ////SPtr<MPIIOMigrationCoProcessor> rcp(new MPIIOMigrationCoProcessor(grid, rSch, pathname, comm));
+        //SPtr<MPIIOMigrationBECoProcessor> rcp(new MPIIOMigrationBECoProcessor(grid, rSch, pathname, comm));
+        //rcp->setNu(nuLB);
+        //rcp->setNuLG(nuL, nuG);
+        //rcp->setDensityRatio(densityRatio);
+
+        //rcp->setLBMKernel(kernel);
+        //rcp->setBCProcessor(bcProc);
+        //////////////////////////////////////////////////////////////////////////
+
+        if (newStart) {
+
+            // bounding box
+            double g_minX1 = boundingBox[0];
+            double g_minX2 = boundingBox[2];
+            double g_minX3 = boundingBox[4];
+
+            double g_maxX1 = boundingBox[1];
+            double g_maxX2 = boundingBox[3];
+            double g_maxX3 = boundingBox[5];
+
+            // geometry
+            SPtr<GbObject3D> gridCube(new GbCuboid3D(g_minX1, g_minX2, g_minX3, g_maxX1, g_maxX2, g_maxX3));
+            if (myid == 0)
+                GbSystem3D::writeGeoObject(gridCube.get(), pathname + "/geo/gridCube",
+                    WbWriterVtkXmlBinary::getInstance());
+
+            if (myid == 0) {
+                UBLOG(logINFO, "uLb = " << uLB);
+                UBLOG(logINFO, "rho = " << rhoLB);
+                UBLOG(logINFO, "nuLb = " << nuLB);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "Preprocess - start");
+            }
+
+            GenBlocksGridVisitor genBlocks(gridCube);
+            grid->accept(genBlocks);
+
+ 
+            SPtr<WriteBlocksCoProcessor> ppblocks(new WriteBlocksCoProcessor(
+                grid, SPtr<UbScheduler>(new UbScheduler(1)), pathname, WbWriterVtkXmlBinary::getInstance(), comm));
+
+            //SPtr<Grid3DVisitor> metisVisitor(
+            //    new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW));
+            //InteractorsHelper intHelper(grid, metisVisitor);
+            //intHelper.selectBlocks();
+
+            ppblocks->process(0);
+            ppblocks.reset();
+
+            unsigned long long numberOfBlocks = (unsigned long long)grid->getNumberOfBlocks();
+            int ghostLayer                    = 3;
+            unsigned long long numberOfNodesPerBlock =
+                (unsigned long long)(blocknx[0]) * (unsigned long long)(blocknx[1]) * (unsigned long long)(blocknx[2]);
+            unsigned long long numberOfNodes = numberOfBlocks * numberOfNodesPerBlock;
+            unsigned long long numberOfNodesPerBlockWithGhostLayer =
+                numberOfBlocks * (blocknx[0] + ghostLayer) * (blocknx[1] + ghostLayer) * (blocknx[2] + ghostLayer);
+            double needMemAll =
+                double(numberOfNodesPerBlockWithGhostLayer * (27 * sizeof(double) + sizeof(int) + sizeof(float) * 4));
+            double needMem = needMemAll / double(comm->getNumberOfProcesses());
+
+            if (myid == 0) {
+                UBLOG(logINFO, "Number of blocks = " << numberOfBlocks);
+                UBLOG(logINFO, "Number of nodes  = " << numberOfNodes);
+                int minInitLevel = grid->getCoarsestInitializedLevel();
+                int maxInitLevel = grid->getFinestInitializedLevel();
+                for (int level = minInitLevel; level <= maxInitLevel; level++) {
+                    int nobl = grid->getNumberOfBlocks(level);
+                    UBLOG(logINFO, "Number of blocks for level " << level << " = " << nobl);
+                    UBLOG(logINFO, "Number of nodes for level " << level << " = " << nobl * numberOfNodesPerBlock);
+                }
+                UBLOG(logINFO, "Necessary memory  = " << needMemAll << " bytes");
+                UBLOG(logINFO, "Necessary memory per process = " << needMem << " bytes");
+                UBLOG(logINFO, "Available memory per process = " << availMem << " bytes");
+            }
+
+            MultiphaseSetKernelBlockVisitor kernelVisitor(kernel, nuL, nuG, densityRatio, beta, kappa, theta, availMem,
+                needMem);
+
+            grid->accept(kernelVisitor);
+
+            if (refineLevel > 0) {
+                SetUndefinedNodesBlockVisitor undefNodesVisitor;
+                grid->accept(undefNodesVisitor);
+            }
+
+
+            //intHelper.setBC();
+
+            //grid->accept(bcVisitor);
+
+            // initialization of distributions
+            LBMReal x1c = (g_maxX1 - g_minX1-1)/2;
+            LBMReal x2c = (g_maxX2 - g_minX2-1)/2;
+            LBMReal x3c = (g_maxX3 - g_minX3-1)/2;
+            mu::Parser fct1;
+            fct1.SetExpr("0.5-0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
+            fct1.DefineConst("x1c", x1c);
+            fct1.DefineConst("x2c", x2c);
+            fct1.DefineConst("x3c", x3c);
+            fct1.DefineConst("radius", radius);
+            fct1.DefineConst("interfaceThickness", interfaceThickness);
+
+            mu::Parser fct2;
+            fct2.SetExpr("0.5*uLB-uLB*0.5*tanh(2*(sqrt((x1-x1c)^2+(x2-x2c)^2+(x3-x3c)^2)-radius)/interfaceThickness)");
+            fct2.DefineConst("uLB", uLB);
+            fct2.DefineConst("x1c", x1c);
+            fct2.DefineConst("x2c", x2c);
+            fct2.DefineConst("x3c", x3c);
+            fct2.DefineConst("radius", radius);
+            fct2.DefineConst("interfaceThickness", interfaceThickness);
+
+            MultiphaseInitDistributionsBlockVisitor initVisitor(densityRatio, interfaceThickness, radius);
+            initVisitor.setPhi(fct1);
+            initVisitor.setVx1(fct2);
+            grid->accept(initVisitor);
+
+            // boundary conditions grid
+            {
+                SPtr<UbScheduler> geoSch(new UbScheduler(1));
+                SPtr<WriteBoundaryConditionsCoProcessor> ppgeo(new WriteBoundaryConditionsCoProcessor(
+                    grid, geoSch, pathname, WbWriterVtkXmlBinary::getInstance(), comm));
+                ppgeo->process(0);
+                ppgeo.reset();
+            }
+
+            if (myid == 0)
+                UBLOG(logINFO, "Preprocess - end");
+        } else {
+            if (myid == 0) {
+                UBLOG(logINFO, "Parameters:");
+                UBLOG(logINFO, "uLb = " << uLB);
+                UBLOG(logINFO, "rho = " << rhoLB);
+                UBLOG(logINFO, "nuLb = " << nuLB);
+                UBLOG(logINFO, "Re = " << Re);
+                UBLOG(logINFO, "dx = " << dx);
+                UBLOG(logINFO, "number of levels = " << refineLevel + 1);
+                UBLOG(logINFO, "numOfThreads = " << numOfThreads);
+                UBLOG(logINFO, "path = " << pathname);
+            }
+
+            //rcp->restart((int)restartStep);
+            //grid->setTimeStep(restartStep);
+
+            if (myid == 0)
+                UBLOG(logINFO, "Restart - end");
+        }
+
+        TwoDistributionsSetConnectorsBlockVisitor setConnsVisitor(comm);
+        grid->accept(setConnsVisitor);
+
+        //ThreeDistributionsSetConnectorsBlockVisitor setConnsVisitor(comm);
+        //grid->accept(setConnsVisitor);
+
+        SPtr<UbScheduler> visSch(new UbScheduler(outTime));
+        SPtr<WriteMultiphaseQuantitiesCoProcessor> pp(new WriteMultiphaseQuantitiesCoProcessor(
+            grid, visSch, pathname, WbWriterVtkXmlBinary::getInstance(), conv, comm));
+        //SPtr<WriteMacroscopicQuantitiesCoProcessor> pp(new WriteMacroscopicQuantitiesCoProcessor(
+        //    grid, visSch, pathname, WbWriterVtkXmlBinary::getInstance(), conv, comm));
+
+        SPtr<UbScheduler> nupsSch(new UbScheduler(10, 30, 100));
+        SPtr<NUPSCounterCoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
+
+        SPtr<UbScheduler> stepGhostLayer(new UbScheduler(1));
+        SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, endTime));
+        calculator->addCoProcessor(npr);
+        calculator->addCoProcessor(pp);
+        //calculator->addCoProcessor(rcp);
+
+
+
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-start");
+        calculator->calculate();
+        if (myid == 0)
+            UBLOG(logINFO, "Simulation-end");
+    } catch (std::exception &e) {
+        cerr << e.what() << endl << flush;
+    } catch (std::string &s) {
+        cerr << s << endl;
+    } catch (...) {
+        cerr << "unknown exception" << endl;
+    }
+}
+int main(int argc, char *argv[])
+{
+    // Sleep(30000);
+    if (argv != NULL) {
+        if (argv[1] != NULL) {
+            run(string(argv[1]));
+        } else {
+            cout << "Configuration file is missing!" << endl;
+        }
+    }
+}
diff --git a/apps/cpu/PoiseuilleFlow/CMakeLists.txt b/apps/cpu/PoiseuilleFlow/CMakeLists.txt
index 43ea7697a846d3453bcdf8e53f6b5a9622ee9e71..1959719d81013762d37f655b342f755135c9ef85 100644
--- a/apps/cpu/PoiseuilleFlow/CMakeLists.txt
+++ b/apps/cpu/PoiseuilleFlow/CMakeLists.txt
@@ -1,25 +1,6 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
-
-########################################################
-## C++ PROJECT                                       ###
-########################################################
 PROJECT(pf)
 
-INCLUDE(${APPS_ROOT}/IncludsList.cmake) 
-
-#################################################################
-###   LOCAL FILES                                             ###
-#################################################################
-FILE(GLOB SPECIFIC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.h
-                         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-                         ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp  )
- 
-SET(ALL_SOURCES ${ALL_SOURCES} ${SPECIFIC_FILES})
-SOURCE_GROUP(src FILES ${SPECIFIC_FILES})
-  
-SET(CAB_ADDITIONAL_LINK_LIBRARIES VirtualFluids)
+vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore muparser basics ${MPI_CXX_LIBRARIES} FILES pf.cpp pf1.cpp )
 
-#################################################################
-###   CREATE PROJECT                                          ###
-#################################################################
-CREATE_CAB_PROJECT(pf BINARY)
+vf_get_library_name (library_name)
+target_include_directories(${library_name} PRIVATE ${APPS_ROOT_CPU})
\ No newline at end of file
diff --git a/apps/cpu/PoiseuilleFlow/pf1.cpp b/apps/cpu/PoiseuilleFlow/pf1.cpp
index e7f4bbf1baa03e235a4263c8c9a1293c1d51d7f3..3880e9583dd07bdad7fcd11272f0a372155ef654 100644
--- a/apps/cpu/PoiseuilleFlow/pf1.cpp
+++ b/apps/cpu/PoiseuilleFlow/pf1.cpp
@@ -137,8 +137,7 @@ void pf1()
    grid->accept(initVisitor);
 
    //set connectors
-   InterpolationProcessorPtr iProcessor(new IncompressibleOffsetInterpolationProcessor());
-   SetConnectorsBlockVisitor setConnsVisitor(comm, true, D3Q27System::ENDDIR, nuLB, iProcessor);
+   OneDistributionSetConnectorsBlockVisitor setConnsVisitor(comm);
    grid->accept(setConnsVisitor);
 
    SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
diff --git a/apps/cpu/ViskomatXL/viskomat.cfg b/apps/cpu/ViskomatXL/viskomat.cfg
index 71228231e36810fca21ebe752be2488b5c56264d..4227ba9f821cfa9336a69c14b828829c6ff2ccb1 100644
--- a/apps/cpu/ViskomatXL/viskomat.cfg
+++ b/apps/cpu/ViskomatXL/viskomat.cfg
@@ -1,4 +1,4 @@
-outputPath = d:/temp/viskomat
+outputPath = d:/temp/viskomatCylinderRestartTest3_Migration
 geoPath = d:/Projects/TRR277/Project/WP1/Rheometer/Aileen
 geoFile = fishbone.stl
 
@@ -7,24 +7,44 @@ availMem = 8e9
 logToFile = false
 
 blocknx = 16 16 16
+#blocknx = 1 8 8
 #boundingBox = -4 171 -9.5 155.5 -76.5 82.5
-boundingBox = -4 166 -9.5 155.5 -76.5 82.5
+#boundingBox = -4 166 -9.5 155.5 -76.5 82.5
+
+#boundingBox = -4 166 0 165 0 165
+
+#boundingBox = -4 166 -82.5 82.5 -82.5 82.5
+
+boundingBox = 0 140 -82.5 82.5 -82.5 82.5
+
+# around X
+#blocknx = 1 16 16
+#boundingBox = 0 1 0 165 0 165
+
+# around Y
+#blocknx = 16 1 16
+#boundingBox =  0 165 0 1 0 165
+
+#zero test
+#blocknx = 8 8 8
+#boundingBox =  0 8 0 8 0 8
+
 deltax = 1
 
 refineLevel = 0
 
 #nuLB = 1.5e-4
-OmegaLB = 1e-5
+OmegaLB = 1e-4
 tau0 = 20e-7
 
 resolution = 32
 scaleFactor = 1
 
 newStart = true
-restartStep = 100000
+restartStep = 10000
 
 cpStart = 10000
 cpStep = 10000
 
-outTime = 10000
-endTime = 100000
\ No newline at end of file
+outTime = 1000
+endTime = 1000000
\ No newline at end of file
diff --git a/apps/cpu/ViskomatXL/viskomat.cpp b/apps/cpu/ViskomatXL/viskomat.cpp
index 113d9c6da16bff21267bb723d48adb2a3c9d5619..91dfb050901571b5cf37e02cfca5ab8c7a8eb8cb 100644
--- a/apps/cpu/ViskomatXL/viskomat.cpp
+++ b/apps/cpu/ViskomatXL/viskomat.cpp
@@ -81,11 +81,11 @@ void bflow(string configname)
       //cpStep = endTime;
 
       //double Re = 1.38230076758;
-      double N  = 80;
-      double Omega = 2 * UbMath::PI / 60.0 * N;
-      double mu    = 1;
-      double R     = 0.165 / 2.0;
-      double rho   = 970;
+      double N  = 80; //rpm
+      double Omega = 2 * UbMath::PI / 60.0 * N; //rad/s
+      double mu    = 1; //Pa s
+      double R     = 0.165 / 2.0; //m
+      double rho   = 970; //kg/m^3
       double Re    = Omega * R * R * rho / mu;
 
       double nuLB = OmegaLB * R * 1e3 * R * 1e3 / Re;
@@ -151,21 +151,36 @@ void bflow(string configname)
       SPtr<BCAdapter> slipBCAdapter(new SlipBCAdapter());
       slipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleSlipBCAlgorithm()));
 
-      mu::Parser fctVx;
-      //fctVx.SetExpr("omega*(r-x2)");
-      fctVx.SetExpr("-Omega*(x2-r)");
-      fctVx.DefineConst("Omega", OmegaLB);
-      //fctVx.DefineConst("r", R0);
-      fctVx.DefineConst("r", 0.5 * (g_maxX2 - g_minX2));
-
+      //// rotation around X-axis
       mu::Parser fctVy;
-      fctVy.SetExpr("Omega*(x3-r)");
+      fctVy.SetExpr("-Omega*(x3-z0-r)");
       fctVy.DefineConst("Omega", OmegaLB);
-      //fctVy.DefineConst("r", R0);
-      fctVy.DefineConst("r", 0.5 * (g_maxX2 - g_minX2));
+      fctVy.DefineConst("r", 0.5 * (g_maxX3 - g_minX3));
+      fctVy.DefineConst("z0", g_minX3);
 
       mu::Parser fctVz;
-      fctVz.SetExpr("0.0");
+      fctVz.SetExpr("Omega*(x2-y0-r)");
+      fctVz.DefineConst("Omega", OmegaLB);
+      fctVz.DefineConst("r", 0.5 * (g_maxX2 - g_minX2));
+      fctVz.DefineConst("y0", g_minX2);
+
+      mu::Parser fctVx;
+      fctVx.SetExpr("0.0");
+
+      // rotation around Y-axis
+      //mu::Parser fctVz;
+      //// fctVx.SetExpr("omega*(r-x2)");
+      //fctVz.SetExpr("Omega*(x1-r)");
+      //fctVz.DefineConst("Omega", OmegaLB);
+      //fctVz.DefineConst("r", 0.5 * (g_maxX1 - g_minX1));
+
+      //mu::Parser fctVx;
+      //fctVx.SetExpr("-Omega*(x3-r)");
+      //fctVx.DefineConst("Omega", OmegaLB);
+      //fctVx.DefineConst("r", 0.5 * (g_maxX1 - g_minX1));
+
+      //mu::Parser fctVy;
+      //fctVy.SetExpr("0.0");
 
       SPtr<BCAdapter> velocityBCAdapter(new VelocityBCAdapter(true, true, true, fctVx, fctVy, fctVz, 0, BCFunction::INFCONST));
       velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
@@ -188,8 +203,11 @@ void bflow(string configname)
       SPtr<BCProcessor> bcProc;
       bcProc = SPtr<BCProcessor>(new BCProcessor());
 
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BGKLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantLBMKernel());
-      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CompressibleCumulant4thOrderViscosityLBMKernel());
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CompressibleCumulant4thOrderViscosityLBMKernel());
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel()); 
+      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantK17LBMKernel()); 
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new HerschelBulkleyModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BinghamModelLBMKernel());
@@ -211,28 +229,64 @@ void bflow(string configname)
       //restart
       SPtr<UbScheduler> mSch(new UbScheduler(cpStep, cpStart));
       SPtr<MPIIOMigrationCoProcessor> restartCoProcessor(new MPIIOMigrationCoProcessor(grid, mSch, outputPath, comm));
+      //SPtr<MPIIORestartCoProcessor> restartCoProcessor(new MPIIORestartCoProcessor(grid, mSch, outputPath, comm));
       restartCoProcessor->setLBMKernel(kernel);
       restartCoProcessor->setBCProcessor(bcProc);
       //restartCoProcessor->setNu(k);
       //////////////////////////////////////////////////////////////////////////
 
       ////stator
-      //SPtr<GbObject3D> stator(new GbCylinder3D(0.5 * g_maxX1, 0.5 * g_maxX2, g_minX3-2.0*deltax, 0.5 * g_maxX1, 0.5 * g_maxX2, g_maxX3+ 2.0 * deltax, 0.5 * g_maxX1));
-      SPtr<GbTriFaceMesh3D> stator = make_shared<GbTriFaceMesh3D>();
-      stator->readMeshFromSTLFileBinary(geoPath + "/" + geoFile, false);
+      // rotation around X-axis 
+      SPtr<GbObject3D> stator(new GbCylinder3D(g_minX1 - 3.0 * deltax, g_minX2 + 0.5 * (g_maxX2 - g_minX2),
+                                               g_minX3 + 0.5 * (g_maxX3 - g_minX3), g_maxX1 + 3.0 * deltax,
+          g_minX2 + 0.5 * (g_maxX2 - g_minX2), g_minX3 + 0.5 * (g_maxX3 - g_minX3), 0.5 * (g_maxX3 - g_minX3) * 0.5));
+
+       // rotation around Y-axis 
+      //SPtr<GbObject3D> stator(new GbCylinder3D(g_minX1 + 0.5 * (g_maxX1 - g_minX1), g_minX2 - 3.0 * deltax, 
+      //                                         g_minX3 + 0.5 * (g_maxX3 - g_minX3), g_minX1 + 0.5 * (g_maxX1 - g_minX1),
+      //                                         g_maxX2 + 3.0 * deltax, g_minX3 + 0.5 * (g_maxX3 - g_minX3),
+      //                                         0.5 * (g_maxX3 - g_minX3) * 0.5));
+
+      SPtr<D3Q27Interactor> statorInt =
+          SPtr<D3Q27Interactor>(new D3Q27Interactor(stator, grid, noSlipBCAdapter, Interactor3D::SOLID));
+      
+      //SPtr<GbTriFaceMesh3D> stator = make_shared<GbTriFaceMesh3D>();
+      //stator->readMeshFromSTLFileBinary(geoPath + "/" + geoFile, false);
+      //stator->translate(4.0, -73.0, -6.0);
       GbSystem3D::writeGeoObject(stator.get(), outputPath + "/geo/stator", WbWriterVtkXmlBinary::getInstance());
-
-      SPtr<D3Q27Interactor> statorInt = SPtr<D3Q27TriFaceMeshInteractor>(
-          new D3Q27TriFaceMeshInteractor(stator, grid, velocityBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
+      
+      //SPtr<D3Q27Interactor> statorInt = SPtr<D3Q27TriFaceMeshInteractor>(
+      //    new D3Q27TriFaceMeshInteractor(stator, grid, noSlipBCAdapter, Interactor3D::SOLID, Interactor3D::EDGES));
 
       ////rotor (cylinder)
-      SPtr<GbObject3D> rotor(new GbCylinder3D(g_minX1, g_minX2 + 0.5 * (g_maxX2 - g_minX2),
-                                              g_minX3 + 0.5 * (g_maxX3 - g_minX3),
-                                              g_maxX1,
+      // rotation around X-axis 
+      SPtr<GbObject3D> rotor(new GbCylinder3D(
+          g_minX1 - 3.0 * deltax, g_minX2 + 0.5 * (g_maxX2 - g_minX2),
+                                              g_minX3 + 0.5 * (g_maxX3 - g_minX3), g_maxX1 + 3.0 * deltax,
           g_minX2 + 0.5 * (g_maxX2 - g_minX2), g_minX3 + 0.5 * (g_maxX3 - g_minX3), 0.5 * (g_maxX3 - g_minX3)));
+      // rotation around Y-axis
+      //SPtr<GbObject3D> rotor(new GbCylinder3D(g_minX1 + 0.5 * (g_maxX1 - g_minX1), g_minX2 - 3.0 * deltax,
+      //                                        g_minX3 + 0.5 * (g_maxX3 - g_minX3), g_minX1 + 0.5 * (g_maxX1 - g_minX1),
+      //                                        g_maxX2 + 3.0 * deltax, g_minX3 + 0.5 * (g_maxX3 - g_minX3),
+      //                                        0.5 * (g_maxX3 - g_minX3)));
+
       GbSystem3D::writeGeoObject(rotor.get(), outputPath + "/geo/rotor", WbWriterVtkXmlBinary::getInstance());
 
-      SPtr<D3Q27Interactor> rotorInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(rotor, grid, noSlipBCAdapter, Interactor3D::INVERSESOLID));
+      SPtr<D3Q27Interactor> rotorInt =
+          SPtr<D3Q27Interactor>(new D3Q27Interactor(rotor, grid, velocityBCAdapter, Interactor3D::INVERSESOLID));
+
+      //walls
+      GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - deltax, g_minX2 - deltax, g_minX3 - deltax, g_minX1,
+          g_maxX2 + deltax, g_maxX3 + deltax));
+      if (myid == 0) GbSystem3D::writeGeoObject(wallXmin.get(), outputPath + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
+
+      GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 +  (double)blocknx[0]*deltax,
+          g_maxX2 + deltax, g_maxX3 + deltax));
+      if (myid == 0) GbSystem3D::writeGeoObject(wallXmax.get(), outputPath + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
+
+      //wall interactors
+      SPtr<D3Q27Interactor> wallXminInt(new D3Q27Interactor(wallXmin, grid, slipBCAdapter, Interactor3D::SOLID));
+      SPtr<D3Q27Interactor> wallXmaxInt(new D3Q27Interactor(wallXmax, grid, slipBCAdapter, Interactor3D::SOLID));
 
       if (myid == 0)
       {
@@ -281,19 +335,6 @@ void bflow(string configname)
          }
 
 
-         //walls
-         GbCuboid3DPtr wallXmin(new GbCuboid3D(g_minX1 - deltax, g_minX2 - deltax, g_minX3 - deltax, g_minX1,
-                                               g_maxX2 + deltax, g_maxX3 + deltax));
-         if (myid == 0) GbSystem3D::writeGeoObject(wallXmin.get(), outputPath + "/geo/wallXmin", WbWriterVtkXmlASCII::getInstance());
-
-         GbCuboid3DPtr wallXmax(new GbCuboid3D(g_maxX1, g_minX2 - deltax, g_minX3 - deltax, g_maxX1 + deltax,
-                                               g_maxX2 + deltax, g_maxX3 + deltax));
-         if (myid == 0) GbSystem3D::writeGeoObject(wallXmax.get(), outputPath + "/geo/wallXmax", WbWriterVtkXmlASCII::getInstance());
-
-         //wall interactors
-         SPtr<D3Q27Interactor> wallXminInt(new D3Q27Interactor(wallXmin, grid, noSlipBCAdapter, Interactor3D::SOLID));
-         SPtr<D3Q27Interactor> wallXmaxInt(new D3Q27Interactor(wallXmax, grid, slipBCAdapter, Interactor3D::SOLID));
-
          ////////////////////////////////////////////
          //METIS
          SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
@@ -353,6 +394,9 @@ void bflow(string configname)
          InitDistributionsBlockVisitor initVisitor;
          grid->accept(initVisitor);
 
+         SPtr<UbScheduler> geoSch(new UbScheduler(1));
+         WriteBoundaryConditionsCoProcessor ppgeo = WriteBoundaryConditionsCoProcessor(grid, geoSch, outputPath, WbWriterVtkXmlBinary::getInstance(), comm);
+         ppgeo.process(0);
 
          if (myid == 0) UBLOG(logINFO, "Preprozess - end");
       }
@@ -360,12 +404,24 @@ void bflow(string configname)
       {
          restartCoProcessor->restart((int)restartStep);
          grid->setTimeStep(restartStep);
-         SetBcBlocksBlockVisitor v1(rotorInt);
-         grid->accept(v1);
-         rotorInt->initInteractor();
-         SetBcBlocksBlockVisitor v2(statorInt);
-         grid->accept(v2);
+         
+         //SetBcBlocksBlockVisitor v1(wallXminInt);
+         //grid->accept(v1);
+         //wallXminInt->initInteractor();
+         //
+         //SetBcBlocksBlockVisitor v2(wallXmaxInt);
+         //grid->accept(v2);
+         //wallXmaxInt->initInteractor();
+         
+         SetBcBlocksBlockVisitor v3(statorInt);
+         grid->accept(v3);
          statorInt->initInteractor();
+
+         SetBcBlocksBlockVisitor v4(rotorInt);
+         grid->accept(v4);
+         rotorInt->initInteractor();
+
+
       }
       
       omp_set_num_threads(numOfThreads);
@@ -381,10 +437,6 @@ void bflow(string configname)
 
       grid->accept(bcVisitor);
 
-      SPtr<UbScheduler> geoSch(new UbScheduler(1));
-      WriteBoundaryConditionsCoProcessor ppgeo = WriteBoundaryConditionsCoProcessor(grid, geoSch, outputPath, WbWriterVtkXmlBinary::getInstance(), comm);
-      ppgeo.process(0);
-
       SPtr<UbScheduler> nupsSch(new UbScheduler(10, 30, 100));
       SPtr<CoProcessor> npr(new NUPSCounterCoProcessor(grid, nupsSch, numOfThreads, comm));
 
@@ -392,13 +444,13 @@ void bflow(string configname)
       SPtr<UbScheduler> visSch(new UbScheduler(outTime));
       //SPtr<UbScheduler> visSch(new UbScheduler(10,1));
       SPtr<WriteMacroscopicQuantitiesCoProcessor> writeMQCoProcessor(new WriteMacroscopicQuantitiesCoProcessor(grid, visSch, outputPath, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
-      //writeMQCoProcessor->process(0);
+      //writeMQCoProcessor->process(100);
 
       SPtr<UbScheduler> forceSch(new UbScheduler(100));
-      SPtr<CalculateTorqueCoProcessor> fp = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueRotor.txt", comm);
+      SPtr<CalculateTorqueCoProcessor> fp = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueRotor.csv", comm);
       fp->addInteractor(rotorInt);
-      //SPtr<CalculateTorqueCoProcessor> fp2 = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueStator.txt", comm);
-      //fp2->addInteractor(statorInt);
+      SPtr<CalculateTorqueCoProcessor> fp2 = make_shared<CalculateTorqueCoProcessor>(grid, forceSch, outputPath + "/torque/TorqueStator.csv", comm);
+      fp2->addInteractor(statorInt);
 
       //SPtr<WriteThixotropyQuantitiesCoProcessor> writeThixotropicMQCoProcessor(new WriteThixotropyQuantitiesCoProcessor(grid, visSch, outputPath, WbWriterVtkXmlBinary::getInstance(), SPtr<LBMUnitConverter>(new LBMUnitConverter()), comm));
 
@@ -406,7 +458,7 @@ void bflow(string configname)
       SPtr<Calculator> calculator(new BasicCalculator(grid, stepGhostLayer, endTime));
       calculator->addCoProcessor(npr);
       calculator->addCoProcessor(fp);
-      //calculator->addCoProcessor(fp2);
+      calculator->addCoProcessor(fp2);
       calculator->addCoProcessor(writeMQCoProcessor);
       //calculator->addCoProcessor(writeThixotropicMQCoProcessor);
       calculator->addCoProcessor(restartCoProcessor);
diff --git a/apps/cpu/rheometer/rheometer.cfg b/apps/cpu/rheometer/rheometer.cfg
index 344062fb9f086cf163542285e5d9d745df3d0d80..9eec8c6ded9b7a5ab8d1e6177c43354a4514ccc3 100644
--- a/apps/cpu/rheometer/rheometer.cfg
+++ b/apps/cpu/rheometer/rheometer.cfg
@@ -1,12 +1,12 @@
-outputPath = d:/temp/rheometer/rheometerBinghamqQBB/rheometerBingham_tau_20e-7_nu_1.5e-3_new_lim_test
-
+#outputPath = d:/temp/rheometer/rheometerBinghamqQBB/rheometerBingham_tau_20e-7_nu_1.5e-3_new_lim_test
+outputPath = d:/temp/Taylor-CouetteFlowIncompCum
 viscosityPath = d:/Projects/VirtualFluidsCombined/apps/cpu/rheometer
 
-numOfThreads = 4
+numOfThreads = 1
 availMem = 8e9
 logToFile = false
 
-blocknx = 8 8 1
+blocknx = 16 16 1  #8 8 1
 #boundingBox = 32 32 1
 deltax = 1
 
@@ -15,17 +15,20 @@ deltax = 1
 
 refineLevel = 0
 
-OmegaLB = 4e-5
+#OmegaLB = 4e-5
+#tau0 = 20e-7
+
+OmegaLB = 1e-4
 tau0 = 20e-7
 
-resolution = 32
+resolution = 165
 scaleFactor = 1
 
 newStart = true
 restartStep = 100000
 
-cpStart = 10000
-cpStep = 10000
+cpStart = 10000000
+cpStep  = 10000000
 
-outTime = 10000
-endTime = 100000
\ No newline at end of file
+outTime = 1
+endTime = 10
\ No newline at end of file
diff --git a/apps/cpu/rheometer/rheometer.cpp b/apps/cpu/rheometer/rheometer.cpp
index 3f87dee3451f98fa47a26a1b032414cab5a513c2..f6f98c122c3197f1a080fd335f8edfc9ee1f4e33 100644
--- a/apps/cpu/rheometer/rheometer.cpp
+++ b/apps/cpu/rheometer/rheometer.cpp
@@ -38,7 +38,7 @@ void bflow(string configname)
       viscosity.load(viscosityPath + "/viscosity.cfg");
       double nuLB = viscosity.getValue<double>("nuLB");
 
-      outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
+      //outputPath = outputPath + "/rheometerBingham_" + config.getValue<string>("resolution") + "_" + config.getValue<string>("OmegaLB");
 
       SPtr<Communicator> comm = MPICommunicator::getInstance();
       int myid = comm->getProcessID();
@@ -89,12 +89,20 @@ void bflow(string configname)
 
       //bounding box
 
+      //double g_minX1 = 0;
+      //double g_minX2 = 0;
+      //double g_minX3 = 0;
+
+      //double g_maxX1 = resolution;// boundingBox[0];
+      //double g_maxX2 = resolution;// boundingBox[1];
+      //double g_maxX3 = 1.0; // boundingBox[2];
+
       double g_minX1 = 0;
       double g_minX2 = 0;
       double g_minX3 = 0;
 
-      double g_maxX1 = resolution;// boundingBox[0];
-      double g_maxX2 = resolution;// boundingBox[1];
+      double g_maxX1 = resolution; // boundingBox[0];
+      double g_maxX2 = resolution; // boundingBox[1];
       double g_maxX3 = 1.0; // boundingBox[2];
 
       //double g_minX1 = -boundingBox[0]/2.0;
@@ -132,9 +140,9 @@ void bflow(string configname)
       //thix->setOmegaMin(omegaMin);
 
       SPtr<BCAdapter> noSlipBCAdapter(new NoSlipBCAdapter());
-      //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
+      noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NoSlipBCAlgorithm()));
       //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyHerschelBulkleyModelNoSlipBCAlgorithm()));
-      noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelNoSlipBCAlgorithm()));
+      //noSlipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelNoSlipBCAlgorithm()));
 
       //SPtr<BCAdapter> slipBCAdapter(new SlipBCAdapter());
       //slipBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleSlipBCAlgorithm()));
@@ -155,11 +163,26 @@ void bflow(string configname)
       mu::Parser fctVz;
       fctVz.SetExpr("0.0");
 
+
+      //// rotation around X-axis
+      //mu::Parser fctVy;
+      //fctVy.SetExpr("-Omega*(x3-r)");
+      //fctVy.DefineConst("Omega", OmegaLB);
+      //fctVy.DefineConst("r", 0.5 * (g_maxX2 - g_minX2));
+
+      //mu::Parser fctVz;
+      //fctVz.SetExpr("Omega*(x2-r)");
+      //fctVz.DefineConst("Omega", OmegaLB);
+      //fctVz.DefineConst("r", 0.5 * (g_maxX2 - g_minX2));
+
+      //mu::Parser fctVx;
+      //fctVx.SetExpr("0.0");
+
       SPtr<BCAdapter> velocityBCAdapter(new VelocityBCAdapter(true, true, true, fctVx, fctVy, fctVz, 0, BCFunction::INFCONST));
-      //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
+      velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityBCAlgorithm()));
       //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new SimpleVelocityBCAlgorithm()));
       //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new VelocityWithDensityBCAlgorithm()));
-      velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelVelocityBCAlgorithm()));
+      //velocityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new RheologyBinghamModelVelocityBCAlgorithm()));
 
       //SPtr<BCAdapter> densityBCAdapter(new DensityBCAdapter());
       //densityBCAdapter->setBcAlgorithm(SPtr<BCAlgorithm>(new NonEqDensityBCAlgorithm()));
@@ -176,9 +199,12 @@ void bflow(string configname)
       SPtr<BCProcessor> bcProc;
       bcProc = SPtr<BCProcessor>(new BCProcessor());
 
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BGKLBMKernel());
+      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new IncompressibleCumulantLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CompressibleCumulant4thOrderViscosityLBMKernel());
-      SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new CumulantK17LBMKernel()); 
+      //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new RheologyBinghamModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new HerschelBulkleyModelLBMKernel());
       //SPtr<LBMKernel> kernel = SPtr<LBMKernel>(new BinghamModelLBMKernel());
       kernel->setBCProcessor(bcProc);
@@ -205,16 +231,26 @@ void bflow(string configname)
       //////////////////////////////////////////////////////////////////////////
 
       ////stator
-      SPtr<GbObject3D> stator(new GbCylinder3D(0.5 * g_maxX1, 0.5 * g_maxX2, g_minX3-2.0*deltax, 0.5 * g_maxX1, 0.5 * g_maxX2, g_maxX3+ 2.0 * deltax, 0.5 * g_maxX1));
-      GbSystem3D::writeGeoObject(stator.get(), outputPath + "/geo/stator", WbWriterVtkXmlBinary::getInstance());
+      SPtr<GbObject3D> rotor(new GbCylinder3D(0.5 * g_maxX1, 0.5 * g_maxX2, g_minX3 - 2.0 * deltax, 0.5 * g_maxX1,
+                                              0.5 * g_maxX2, g_maxX3 + 2.0 * deltax, 0.5 * g_maxX1));
+
+      //around x
+      //SPtr<GbObject3D> stator(new GbCylinder3D(g_minX1 - 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3,                                               g_maxX1 + 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3, 0.5 * g_maxX3));
+
+      GbSystem3D::writeGeoObject(rotor.get(), outputPath + "/geo/stator", WbWriterVtkXmlBinary::getInstance());
 
-      SPtr<D3Q27Interactor> statorInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(stator, grid, velocityBCAdapter, Interactor3D::INVERSESOLID));
+      SPtr<D3Q27Interactor> rotorInt =
+          SPtr<D3Q27Interactor>(new D3Q27Interactor(rotor, grid, velocityBCAdapter, Interactor3D::INVERSESOLID));
 
       ////rotor (cylinder)
-      SPtr<GbObject3D> rotor(new GbCylinder3D(0.5 * g_maxX1, 0.5 * g_maxX2, g_minX3- 2.0 * deltax, 0.5 * g_maxX1, 0.5 * g_maxX2, g_maxX3+ 2.0 * deltax, 0.25 * g_maxX1));
-      GbSystem3D::writeGeoObject(rotor.get(), outputPath + "/geo/rotor", WbWriterVtkXmlBinary::getInstance());
+      SPtr<GbObject3D> stator(new GbCylinder3D(0.5 * g_maxX1, 0.5 * g_maxX2, g_minX3- 2.0 * deltax, 0.5 * g_maxX1, 0.5 * g_maxX2, g_maxX3+ 2.0 * deltax, 0.25 * g_maxX1));
+      
+      //around x
+      //SPtr<GbObject3D> rotor(new GbCylinder3D(g_minX1 - 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3,                                           g_maxX1 + 3.0 * deltax, 0.5 * g_maxX2, 0.5 * g_maxX3, 0.25 * g_maxX3));
+
+      GbSystem3D::writeGeoObject(stator.get(), outputPath + "/geo/rotor", WbWriterVtkXmlBinary::getInstance());
 
-      SPtr<D3Q27Interactor> rotorInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(rotor, grid, noSlipBCAdapter, Interactor3D::SOLID));
+      SPtr<D3Q27Interactor> statorInt = SPtr<D3Q27Interactor>(new D3Q27Interactor(stator, grid, noSlipBCAdapter, Interactor3D::SOLID));
 
       if (myid == 0)
       {
@@ -385,8 +421,8 @@ void bflow(string configname)
       calculator->addCoProcessor(fp);
       calculator->addCoProcessor(fp2);
       calculator->addCoProcessor(writeMQCoProcessor);
-      calculator->addCoProcessor(writeThixotropicMQCoProcessor);
-      calculator->addCoProcessor(restartCoProcessor);
+      //calculator->addCoProcessor(writeThixotropicMQCoProcessor);
+      //calculator->addCoProcessor(restartCoProcessor);
 
       if (myid == 0) UBLOG(logINFO, "Simulation-start");
       calculator->calculate();
diff --git a/apps/cpu/rheometer/viscosity.cfg b/apps/cpu/rheometer/viscosity.cfg
index bf2822c7b1d6dd42fdcbc513a4e6b39264ab4180..065757939d9313c7caaf46ba34f7b989b61914f2 100644
--- a/apps/cpu/rheometer/viscosity.cfg
+++ b/apps/cpu/rheometer/viscosity.cfg
@@ -1 +1 @@
-nuLB = 1.5e-3
\ No newline at end of file
+nuLB = 0.0123058 #1.5e-3
\ No newline at end of file
diff --git a/apps/cpu/sphere/CMakeLists.txt b/apps/cpu/sphere/CMakeLists.txt
index d80737cdd94aa0b912a62acd69dcd30372f6fd82..8346e80d26800582c37090b208e846737e087d5d 100644
--- a/apps/cpu/sphere/CMakeLists.txt
+++ b/apps/cpu/sphere/CMakeLists.txt
@@ -1,6 +1,6 @@
 ########################################################
 ## C++ PROJECT                                       ###
 ########################################################
-PROJECT(sphere)
+PROJECT(sphere LANGUAGES CXX)
 
 vf_add_library(BUILDTYPE binary PRIVATE_LINK VirtualFluidsCore basics ${MPI_CXX_LIBRARIES} FILES sphere.cpp )
\ No newline at end of file
diff --git a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt b/apps/gpu/LBM/DrivenCavity/CMakeLists.txt
index e4f62c0150c29f147dee0b99480d1fe28161511a..8384e1bc6fcfa3fd2514434b620b266e96b3626a 100644
--- a/apps/gpu/LBM/DrivenCavity/CMakeLists.txt
+++ b/apps/gpu/LBM/DrivenCavity/CMakeLists.txt
@@ -5,3 +5,5 @@ PROJECT(DrivenCavity LANGUAGES CUDA CXX)
 vf_add_library(BUILDTYPE binary PRIVATE_LINK basics VirtualFluids_GPU GridGenerator MPI::MPI_CXX FILES DrivenCavity.cpp)
 
 set_source_files_properties(DrivenCavity.cpp PROPERTIES LANGUAGE CUDA)
+
+set_target_properties(DrivenCavity PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index 26c2900ef5681861eb7d55e678ec04ac639852db..fc12e73a18c3e63b93724fc10bb0828391a1a096 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -96,7 +96,9 @@ const real dt = (real)1.0e-3; //0.5e-3;
 
 const uint nx = 64;
 
-std::string path("E:/temp/DrivenCavity");
+//std::string path("F:/Work/Computations/out/DrivenCavity/"); //LEGOLAS
+//std::string path("D:/out/DrivenCavity"); //Mollok
+std::string path("/home/sopeters/Computations/out/DrivenCavity64_unified"); // phoenix
 
 std::string simulationName("DrivenCavityChim");
 
@@ -185,7 +187,7 @@ void multipleLevel(const std::string& configPath)
 
         para->setVelocityRatio(velocity/ velocityLB);
 
-		para->setMainKernel("CumulantK17CompChim");
+		//para->setMainKernel("CumulantK17CompChim");
 
 		para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
             rho = (real)0.0;
diff --git a/docs/Doxyfile b/docs/Doxyfile
new file mode 100644
index 0000000000000000000000000000000000000000..b5057d7697d0616f748d8e6652248966d41b2eb2
--- /dev/null
+++ b/docs/Doxyfile
@@ -0,0 +1,2612 @@
+# Doxyfile 1.9.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = VirtualFluids
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = 1.0
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = "Parallel CFD LBM Solver"
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           = docs/img/VF_logo.png
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = docs/build/
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which efficively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = src/
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.cuh \
+                         *.cu \
+                         *.c++ \
+                         *.d \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.php \
+                         *.php3 \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.f90 \
+                         *.f \
+                         *.for \
+                         *.vhd \
+                         *.vhdl
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = main.md
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# entity all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the main .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using JavaScript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = VF_FETOL \
+                         VF_MPI \
+                         VF_METIS \
+                         VF_ZOLTAN
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
+# files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
+# The default value is: YES.
+
+DOT_CLEANUP            = YES
diff --git a/metadata.xml b/metadata.xml
new file mode 100644
index 0000000000000000000000000000000000000000..7cbae3ae7e1d5d7d48af2f0e5577253a89f953f5
--- /dev/null
+++ b/metadata.xml
@@ -0,0 +1,204 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.3/metadata.xsd">
+	<identifier identifierType="DOI">PLACEHOLDER</identifier>
+	<titles>
+		<title xml:lang="en">VirtualFluids</title>
+	</titles>
+	<language>en</language>
+	<creators>
+		<creator>
+			<creatorName nameType="Personal">Krafczyk, Manfred</creatorName>
+			<givenName>Manfred</givenName>
+			<familyName>Krafczyk</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID">0000-0002-8509-0871</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</creator>
+		<creator>
+			<creatorName nameType="Organizational">Institut für rechnergestützte Modellierung im Bauingenieurwesen</creatorName>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+		</creator>
+	</creators>
+	<publisher xml:lang="de">Institut für rechnergestützte Modellierung im Bauingenieurwesen</publisher>
+	<publicationYear>2021</publicationYear>
+	<resourceType resourceTypeGeneral="Software">Computational Fluid Dynamics Solver</resourceType>
+	<subjects>
+		<subject subjectScheme="DDC" schemeURI="https://www.oclc.org/en/dewey.html">532 Fluid Mechanics, liquid mechanics</subject>
+	</subjects>
+	<contributors>
+		<contributor contributorType="Researcher">
+			<contributorName>Ahrenholz, Benjamin</contributorName>
+			<givenName>Benjamin</givenName>
+			<familyName>Ahrenholz</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Alihussein, Hussein</contributorName>
+			<givenName>Hussein</givenName>
+			<familyName>Alihussein</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3656-7028</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Bindick, Sebastian</contributorName>
+			<givenName>Sebastian</givenName>
+			<familyName>Bindick</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Brendel, Aileen</contributorName>
+			<givenName>Aileen</givenName>
+			<familyName>Brendel</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Geier, Martin</contributorName>
+			<givenName>Martin</givenName>
+			<familyName>Geier</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-8367-9412</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Geller, Sebastian</contributorName>
+			<givenName>Sebastian</givenName>
+			<familyName>Geller</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Goraki Fard, Ehsan</contributorName>
+			<givenName>Ehsan</givenName>
+			<familyName>Goraki Fard</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Hegewald, Jan</contributorName>
+			<givenName>Jan</givenName>
+			<familyName>Hegewald</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Janßen, Christian</contributorName>
+			<givenName>Christian</givenName>
+			<familyName>Janßen</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Kutscher, Konstantin</contributorName>
+			<givenName>Konstantin</givenName>
+			<familyName>Kutscher</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-1099-1608</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Lenz, Stephan</contributorName>
+			<givenName>Stephan</givenName>
+			<familyName>Lenz</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Linxweiler, Jan</contributorName>
+			<givenName>Jan</givenName>
+			<familyName>Linxweiler</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-2755-5087</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Lux, Lennard</contributorName>
+			<givenName>Lennard</givenName>
+			<familyName>Lux</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Marcus, Sven</contributorName>
+			<givenName>Sven</givenName>
+			<familyName>Marcus</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0003-3689-2162</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Universitätsbibliothek Braunschweig</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Peters, Sören</contributorName>
+			<givenName>Sören</givenName>
+			<familyName>Peters</familyName>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Safari, Hesameddin</contributorName>
+			<givenName>Hesameddin</givenName>
+			<familyName>Safari</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Schönherr, Martin</contributorName>
+			<givenName>Martin</givenName>
+			<familyName>Schönherr</familyName>
+			<nameIdentifier nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0002-4774-1776</nameIdentifier>
+			<affiliation xml:lang="de">TU Braunschweig</affiliation>
+			<affiliation xml:lang="en">Institut für rechnergestützte Modellierung im Bauingenieurwesen</affiliation>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Stiebler, Maik</contributorName>
+			<givenName>Maik</givenName>
+			<familyName>Stiebler</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Textor, Sören</contributorName>
+			<givenName>Sören</givenName>
+			<familyName>Textor</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Tölke, Jonas</contributorName>
+			<givenName>Jonas</givenName>
+			<familyName>Tölke</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Uphoff, Sonja</contributorName>
+			<givenName>Sonja</givenName>
+			<familyName>Uphoff</familyName>
+		</contributor>
+
+		<contributor contributorType="Researcher">
+			<contributorName>Wellmann, Anna</contributorName>
+			<givenName>Anna</givenName>
+			<familyName>Wellmann</familyName>
+		</contributor>
+	</contributors>
+	<dates>
+		<date dateType="Created">2000</date>
+	</dates>
+	<formats>
+		<format>text/x-c</format>
+		<format>text/x-h</format>
+		<format>text/x-script.python</format>
+	</formats>
+	<relatedIdentifiers>
+		<relatedIdentifier relatedIdentifierType="URL" relationType="Requires" resourceTypeGeneral="Software">https://www.open-mpi.org/software/ompi/v4.1/</relatedIdentifier>
+		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://cmake.org</relatedIdentifier>
+		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://gcc.gnu.org</relatedIdentifier>
+		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://clang.llvm.org</relatedIdentifier>
+		<relatedIdentifier relatedIdentifierType="URL" relationType="IsCompiledBy" resourceTypeGeneral="Software">https://visualstudio.microsoft.com/vs/features/cplusplus/</relatedIdentifier>
+	</relatedIdentifiers>
+	<rightsList>
+		<rights xml:lang="en" schemeURI="https://spdx.org/licenses/" rightsIdentifierScheme="SPDX" rightsIdentifier="GPL-3.0-only" rightsURI="https://www.gnu.org/licenses/gpl-3.0-standalone.html">GNU General Public License Version 3</rights>
+	</rightsList>
+	<descriptions>
+		<description descriptionType="Abstract">
+			VirtualFluids (VF) is a research code developed at the Institute for Computational Modeling in Civil Engineering (iRMB). The code is a Computational Fluid Dynamics (CFD) solver based on the Lattice Boltzmann Method (LBM) for turbulent, thermal, multiphase and multicomponent flow problems as well as for multi-field problems such as Fluid-Structure-interaction including distributed pre- and postprocessing capabilities for simulations with more than 100 billion degrees of freedom.
+		</description>
+	</descriptions>
+</resource>
diff --git a/setup.py b/setup.py
index 6f4ff71b014dbafa8800af234cf715f5207fb2a8..ffe6663be9561a209945b91bb396254b703ae892 100644
--- a/setup.py
+++ b/setup.py
@@ -10,11 +10,17 @@ from distutils.version import LooseVersion
 
 vf_cmake_args = [
     "-DBUILD_VF_PYTHON_BINDINGS=ON",
+    "-DBUILD_VF_DOUBLE_ACCURACY=ON",
+    "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
+    "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache",
+    "-DCMAKE_C_COMPILER_LAUNCHER=ccache",
     "-DBUILD_VF_CPU:BOOL=ON",
+    "-DBUILD_VF_GPU:BOOL=OFF",
     "-DUSE_METIS=ON",
     "-DUSE_MPI=ON",
     "-DBUILD_SHARED_LIBS=OFF",
-    "-DBUILD_VF_UNIT_TESTS:BOOL=ON"
+    "-DBUILD_VF_UNIT_TESTS:BOOL=ON",
+    "-DBUILD_WARNINGS_AS_ERRORS=OFF"
 ]
 
 
diff --git a/sonar-project.properties b/sonar-project.properties
index 7f31fa97101d3002aa47096831206cf9768a85b5..a25bb1af8070f0ea5b75f67e624211f38fef38bf 100644
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -27,8 +27,6 @@ sonar.cxx.clangtidy.reportPath=build/clangtidy.txt
 
 sonar.cxx.jsonCompilationDatabase=build/compile_commands.json
 
-sonar.cxx.gcc.reportPath=build/gcc_warnings.txt
-
 #sonar.cxx.valgrind.reportPath
 
 sonar.cxx.funccomplexity.threshold=10
diff --git a/src/basics/basics/container/CbVectorPool.h b/src/basics/basics/container/CbVectorPool.h
index e1a061d1ac3016fbe3b116243473ee609b24eb8c..0272056b51d65aacafad78cbae03971a24879a7e 100644
--- a/src/basics/basics/container/CbVectorPool.h
+++ b/src/basics/basics/container/CbVectorPool.h
@@ -118,7 +118,7 @@ public:
             return this->allocData(allocator, vec, dataSize, value);
         }
 
-        UB_THROW(UbException(UB_EXARGS, "vector-key=" + UbSystem::toString(allocator.key) + " bereits vergeben!"));
+        UB_THROW(UbException(UB_EXARGS, "vector-key=" + UbSystem::toString(allocator.key) + " already taken! (e.g. SetConnectorBlockVisitor was called several times"));
     }
     /*==================================================================*/
     bool resizeVectorData(CbVector<value_type> &vec, const size_type &dataSize, const value_type &value = value_type())
diff --git a/src/basics/geometry3d/GbCylinder3D.cpp b/src/basics/geometry3d/GbCylinder3D.cpp
index f6d1ed6e701fb7e3209a6e67720dc3d41d8f1aeb..2b90ca0fae9a7e22245961b8d713a35d72fa7df8 100644
--- a/src/basics/geometry3d/GbCylinder3D.cpp
+++ b/src/basics/geometry3d/GbCylinder3D.cpp
@@ -1162,12 +1162,10 @@ double GbCylinder3D::getIntersectionRaytraceFactor(const double &x1, const doubl
             } else
                 return -1.0;
         } else {
-            // if     (UbMath::negative(rx1)) d = -1.0 * (x1 - minX1) / rx1;
-            // else if(UbMath::positive(rx1)) d = -1.0 * (x1 - maxX1) / rx1;
             if (UbMath::negative(rx1))
-                d = -1.0 * (x1 - maxX1) / rx1;
-            else if (UbMath::positive(rx1))
                 d = -1.0 * (x1 - minX1) / rx1;
+            else if (UbMath::positive(rx1))
+                d = -1.0 * (x1 - maxX1) / rx1;
         }
     } else if (this->isParallelToX2Axis()) {
         if (UbMath::equal(x2, minX2) && UbMath::negative(rx2))
diff --git a/src/cpu/VirtualFluids.h b/src/cpu/VirtualFluids.h
index 59dee98b1ec59ce85d270fafbe96ca7cd6b85473..363c9c046b37a45d800ea142e79617f71a8499d3 100644
--- a/src/cpu/VirtualFluids.h
+++ b/src/cpu/VirtualFluids.h
@@ -228,7 +228,7 @@
 #include <LBM/LBMKernelETD3Q27BGK.h>
 #include <LBM/LBMSystem.h>
 #include <LBM/LBMUnitConverter.h>
-//#include <LBM/BGKLBMKernel.h>
+#include <LBM/BGKLBMKernel.h>
 #include <LBM/ThixotropyLBMKernel.h>
 #include <LBM/ThixotropyExpLBMKernel.h>
 #include <LBM/CumulantLBMKernel.h>
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
index 69543bc6dce034c9c878711013d0297180c3d99f..5c852528a2abe2bf8de06753f9aaa78bf7f8a565 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.cpp
@@ -36,7 +36,12 @@
 #include "BCArray3D.h"
 #include "BoundaryConditions.h"
 #include "EsoTwist3D.h"
+#include "Block3D.h"
 
+void BCAlgorithm::setBlock(SPtr<Block3D> block) 
+{ 
+    this->block = block; 
+}
 //////////////////////////////////////////////////////////////////////////
 void BCAlgorithm::setNodeIndex(int x1, int x2, int x3)
 {
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
index 6b60da224ecaac9f56412cbd38b65d758816a2b2..67a3620c0a37c623c697bf8ec6a3f70f2ba00247 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/BCAlgorithm.h
@@ -41,6 +41,7 @@
 class DistributionArray3D;
 class BCArray3D;
 class BoundaryConditions;
+class Block3D;
 
 //! \brief Abstract class of baundary conditions algorithm
 //! \details  BCAlgorithm provides interface for implementation of diferent boundary conditions
@@ -77,6 +78,7 @@ public:
     virtual void addDistributions(SPtr<DistributionArray3D> distributions)   = 0;
     virtual void addDistributionsH(SPtr<DistributionArray3D> distributionsH) {}
     virtual void addDistributionsH2(SPtr<DistributionArray3D> distributionsH2) {}
+    void setBlock(SPtr<Block3D> block);
     void setNodeIndex(int x1, int x2, int x3);
     void setBcPointer(SPtr<BoundaryConditions> bcPtr);
     void setCompressible(bool c);
@@ -107,6 +109,7 @@ protected:
     SPtr<DistributionArray3D> distributionsH;
     SPtr<DistributionArray3D> distributionsH2;
     SPtr<BCArray3D> bcArray;
+    SPtr<Block3D> block;
 
     LBMReal collFactor;
     LBMReal collFactorL, collFactorG, collFactorPh;
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
index 263119401c1d2c437e8ae3962edd33bfcd3b554c..9673a009f75bccd71924985ec9a27187d9e1e12e 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/RheologyBinghamModelVelocityBCAlgorithm.h
@@ -43,7 +43,7 @@ public:
    RheologyBinghamModelVelocityBCAlgorithm()
    {
       BCAlgorithm::type = BCAlgorithm::RheologyBinghamModelVelocityBCAlgorithm;
-      BCAlgorithm::preCollision = false;
+      BCAlgorithm::preCollision = true;
    }
    ~RheologyBinghamModelVelocityBCAlgorithm() {}
    SPtr<BCAlgorithm> clone() override
diff --git a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
index 4905e76a9f7a3e46848b8d8dbaf81095fe9a5b14..15768aeeb043620aece86194319eafe00ea1df60 100644
--- a/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
+++ b/src/cpu/VirtualFluidsCore/BoundaryConditions/VelocityBCAlgorithm.cpp
@@ -34,6 +34,7 @@
 #include "VelocityBCAlgorithm.h"
 #include "BoundaryConditions.h"
 #include "DistributionArray3D.h"
+#include "Block3D.h"
 
 VelocityBCAlgorithm::VelocityBCAlgorithm()
 {
@@ -61,6 +62,9 @@ void VelocityBCAlgorithm::applyBC()
     calcMacrosFct(f, drho, vx1, vx2, vx3);
     calcFeqFct(feq, drho, vx1, vx2, vx3);
 
+    //DEBUG
+    //int blockID = block->getGlobalID();
+
     rho = 1.0 + drho * compressibleFactor;
 
     for (int fdir = D3Q27System::FSTARTDIR; fdir <= D3Q27System::FENDDIR; fdir++) {
diff --git a/src/cpu/VirtualFluidsCore/CMakeLists.txt b/src/cpu/VirtualFluidsCore/CMakeLists.txt
index 09766e266935ed1c7d89223c0713363f4df5edcf..36ac278fb8aee484d38a09a3fd4499965875f712 100644
--- a/src/cpu/VirtualFluidsCore/CMakeLists.txt
+++ b/src/cpu/VirtualFluidsCore/CMakeLists.txt
@@ -25,7 +25,7 @@ if(BUILD_USE_OPENMP)
    list(APPEND VF_LIBRARIES OpenMP::OpenMP_CXX)
 endif()
 
-vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser MPI::MPI_CXX ${VF_LIBRARIES})
+vf_add_library(BUILDTYPE static PUBLIC_LINK basics muparser MPI::MPI_CXX ${VF_LIBRARIES} PRIVATE_LINK lbm)
 
 
 vf_get_library_name(library_name)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
index 37e593d868fa76d5c5cedada99f256c0fa8c74c4..9fd6e8c28aeb1bdb8120c98f0a338aa21b38cc57 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.cpp
@@ -27,13 +27,10 @@ CalculateTorqueCoProcessor::CalculateTorqueCoProcessor( SPtr<Grid3D> grid, SPtr<
          if(path.size()>0){ UbSystem::makeDirectory(path); ostr.open(fname.c_str(), std::ios_base::out | std::ios_base::app);}
          if(!ostr) throw UbException(UB_EXARGS,"couldn't open file "+fname);
       }
-      ostr.width(12);
-      ostr << "step" << "\t";
-      ostr.width(10); 
-      ostr << "Tx" << "\t";
-      ostr.width(18); 
-      ostr << "Ty" << "\t";
-      ostr.width(18);
+
+      ostr << "step;";
+      ostr << "Tx;";
+      ostr << "Ty;";
       ostr << "Tz" << std::endl;
       ostr.close();
    }
@@ -70,12 +67,10 @@ void CalculateTorqueCoProcessor::collectData( double step )
          if(!ostr) throw UbException(UB_EXARGS,"couldn't open file "+fname);
       }
 
-      ostr.width(12); 
-      ostr.setf(std::ios::fixed); 
-      ostr << istep << "\t";
-      write(&ostr, forceX1global, (char*)"\t");
-      write(&ostr, forceX2global, (char*)"\t");
-      write(&ostr, forceX3global, (char*)"\t");
+      ostr << istep << ";";
+      ostr << forceX1global << ";";
+      ostr << forceX2global << ";";
+      ostr << forceX3global;
       ostr << std::endl;
       ostr.close();
    }
@@ -134,6 +129,9 @@ void CalculateTorqueCoProcessor::calculateForces()
             int x3 = node[2];
 
             Vector3D worldCoordinates = grid->getNodeCoordinates(block, x1, x2, x3);
+            double rx                 = worldCoordinates[0] - x1Centre;
+            double ry                 = worldCoordinates[1] - x2Centre;
+            double rz                 = worldCoordinates[2] - x3Centre;
 
             //without ghost nodes
             if (x1 < minX1 || x1 > maxX1 || x2 < minX2 || x2 > maxX2 ||x3 < minX3 || x3 > maxX3 ) continue;
@@ -141,10 +139,14 @@ void CalculateTorqueCoProcessor::calculateForces()
             if(bcArray->isFluid(x1,x2,x3)) //es kann sein, dass der node von einem anderen interactor z.B. als solid gemarkt wurde!!!
             {
                SPtr<BoundaryConditions> bc = bcArray->getBC(x1,x2,x3);
-               UbTupleDouble3 forceVec = getForces(x1,x2,x3,distributions,bc);
-               torqueX1 += (worldCoordinates[1] - x2Centre) * val<3>(forceVec) - (worldCoordinates[2] - x3Centre) * val<2>(forceVec);
-               torqueX2 += (worldCoordinates[2] - x3Centre) * val<1>(forceVec) - (worldCoordinates[0] - x1Centre) * val<3>(forceVec);
-               torqueX3 += (worldCoordinates[0] - x1Centre) * val<2>(forceVec) - (worldCoordinates[1] - x2Centre) * val<1>(forceVec);
+               UbTupleDouble3 forceVec     = getForces(x1,x2,x3,distributions,bc);
+               double Fx                   = val<1>(forceVec);
+               double Fy                   = val<2>(forceVec);
+               double Fz                   = val<3>(forceVec);
+
+               torqueX1 += ry * Fz - rz * Fy;
+               torqueX2 += rz * Fx - rx * Fz;
+               torqueX3 += rx * Fy - ry * Fx;
                //counter++;
                //UBLOG(logINFO, "x1="<<(worldCoordinates[1] - x2Centre)<<",x2=" << (worldCoordinates[2] - x3Centre)<< ",x3=" << (worldCoordinates[0] - x1Centre) <<" forceX3 = " << forceX3);
             }
@@ -241,14 +243,6 @@ void CalculateTorqueCoProcessor::addInteractor( SPtr<D3Q27Interactor> interactor
 {
    interactors.push_back(interactor);
 }
-//////////////////////////////////////////////////////////////////////////
-void CalculateTorqueCoProcessor::write(std::ofstream *fileObject, double value, char *separator) 
-{ 
-   (*fileObject).width(12); 
-   (*fileObject).precision(16); 
-   (*fileObject).setf(std::ios::fixed); 
-   (*fileObject) << value; 
-   (*fileObject) << separator; 
-} 
+
 
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
index 2eda4a063a8e4a6649380e05e2684d3f1a8f68ee..43e1e75acaf4ab115ac9c6dc40b449cf98f97e79 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/CalculateTorqueCoProcessor.h
@@ -36,7 +36,6 @@ protected:
 	void collectData(double step);
    void calculateForces();
    UbTupleDouble3 getForces(int x1, int x2, int x3, SPtr<DistributionArray3D> distributions, SPtr<BoundaryConditions> bc);
-   void write(std::ofstream *fileObject, double value, char *separator);
 private:
    std::string path;
    SPtr<Communicator> comm;
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
index 028cb68ca562771b263b801c48c5f3b3791c723f..a3572c8c40ed63144080c1803d728393eaf30547 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOCoProcessor.cpp
@@ -70,7 +70,7 @@ MPIIOCoProcessor::MPIIOCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const
 
     //---------------------------------------
 
-    MPI_Type_contiguous(7, MPI_CHAR, &arrayPresenceType);
+    MPI_Type_contiguous(8, MPI_CHAR, &arrayPresenceType);
     MPI_Type_commit(&arrayPresenceType);
 }
 
@@ -378,10 +378,24 @@ void MPIIOCoProcessor::clearAllFiles(int step)
     MPI_File_set_size(file_handler, new_size);
     MPI_File_close(&file_handler);
 
-    std::string filename2 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
-    int rc2 = MPI_File_open(MPI_COMM_WORLD, filename2.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
-    if (rc2 != MPI_SUCCESS)
-        throw UbException(UB_EXARGS, "couldn't open file " + filename2);
+    std::string filename21 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
+    int rc21 = MPI_File_open(MPI_COMM_WORLD, filename21.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    if (rc21 != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename21);
+    MPI_File_set_size(file_handler, new_size);
+    MPI_File_close(&file_handler);
+
+    std::string filename22 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    int rc22 = MPI_File_open(MPI_COMM_WORLD, filename22.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    if (rc22 != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename22);
+    MPI_File_set_size(file_handler, new_size);
+    MPI_File_close(&file_handler);
+
+    std::string filename23 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    int rc23 = MPI_File_open(MPI_COMM_WORLD, filename23.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    if (rc23 != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename23);
     MPI_File_set_size(file_handler, new_size);
     MPI_File_close(&file_handler);
 
@@ -441,25 +455,20 @@ void MPIIOCoProcessor::clearAllFiles(int step)
     MPI_File_set_size(file_handler, new_size);
     MPI_File_close(&file_handler);
 
-    std::string filename10 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField.bin";
+    std::string filename10 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
     int rc10 = MPI_File_open(MPI_COMM_WORLD, filename10.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc10 != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename10);
     MPI_File_set_size(file_handler, new_size);
     MPI_File_close(&file_handler);
 
-
-    /*std::string filename10 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC1.bin";
-    int rc10 = MPI_File_open(MPI_COMM_WORLD, filename10.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info,
-    &file_handler); if (rc10 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename10);
+    std::string filename11 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
+    int rc11 = MPI_File_open(MPI_COMM_WORLD, filename11.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    if (rc11 != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename11);
     MPI_File_set_size(file_handler, new_size);
     MPI_File_close(&file_handler);
 
-    std::string filename11 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC2.bin";
-    int rc11 = MPI_File_open(MPI_COMM_WORLD, filename11.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info,
-    &file_handler); if (rc11 != MPI_SUCCESS) throw UbException(UB_EXARGS, "couldn't open file " + filename11);
-    MPI_File_set_size(file_handler, new_size);
-    MPI_File_close(&file_handler);*/
 }
 
 void MPIIOCoProcessor::writeCpTimeStep(int step)
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
index 39b32b6aac7110eb873a52351d46560f8843820e..f82111472cb8bedf88e9a8e5c556f2eca9f0a9a0 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.cpp
@@ -25,9 +25,8 @@ using namespace MPIIODataStructures;
 #define MESSAGE_TAG 80
 #define SEND_BLOCK_SIZE 100000
 
-MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s,
-                                                         const std::string &path, SPtr<Communicator> comm)
-    : MPIIOCoProcessor(grid, s, path, comm), nue(-999.999)
+MPIIOMigrationBECoProcessor::MPIIOMigrationBECoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
+    : MPIIOCoProcessor(grid, s, path, comm), nue(-999.999), nuL(-999.999), nuG(-999.999), densityRatio(-999.999)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
 
@@ -107,44 +106,60 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     int firstGlobalID;
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
+    // std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeDataSet start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    bool multiPhase = false;
     DSArraysPresence arrPresence;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
-    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH;
+    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
     
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getFdistributions());
+            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
             localDistributionsF    = D3Q27EsoTwist3DSplittedVectorPtrF->getLocalDistributions();
             nonLocalDistributionsF = D3Q27EsoTwist3DSplittedVectorPtrF->getNonLocalDistributions();
             zeroDistributionsF     = D3Q27EsoTwist3DSplittedVectorPtrF->getZeroDistributions();
  
-            D3Q27EsoTwist3DSplittedVectorPtrH = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getHdistributions());
-            localDistributionsH    = D3Q27EsoTwist3DSplittedVectorPtrH->getLocalDistributions();
-            nonLocalDistributionsH = D3Q27EsoTwist3DSplittedVectorPtrH->getNonLocalDistributions();
-            zeroDistributionsH     = D3Q27EsoTwist3DSplittedVectorPtrH->getZeroDistributions();
+            D3Q27EsoTwist3DSplittedVectorPtrH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getHdistributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH1 != 0)
+            {
+                multiPhase = true;
+                localDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getLocalDistributions();
+                nonLocalDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getNonLocalDistributions();
+                zeroDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getZeroDistributions();
+            }
+
+            /*D3Q27EsoTwist3DSplittedVectorPtrH2 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getH2distributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+            {
+                localDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getLocalDistributions();
+                nonLocalDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getNonLocalDistributions();
+                zeroDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getZeroDistributions();
+            }*/
+
 
             if (firstBlock) // && block->getKernel()) // when first (any) valid block...
             {
@@ -171,94 +186,92 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
                 }
 
                 // ... than save some parameters that are equal in all blocks
-                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
-                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
-                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
-
-             //  Fdistributions + Hdistributions
-                doubleCountInBlock =
-                    (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
+                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
+                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
+
+                doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
                     dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-                    dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3]) * 2;
+                    dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray =
-                    block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr =
-                    block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr =
-                    block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr =
-                    block->getKernel()->getDataSet()->getPhaseField();
-                if (phaseField3DPtr)
-                    arrPresence.isPhaseFieldPresent = true;
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                if (phaseField3DPtr1)
+                    arrPresence.isPhaseField1Present = true;
                 else
-                    arrPresence.isPhaseFieldPresent = false;
+                    arrPresence.isPhaseField1Present = false;
+
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                if (phaseField3DPtr2)
+                    arrPresence.isPhaseField2Present = true;
+                else
+                    arrPresence.isPhaseField2Present = false;
+
 
                 firstBlock = false;
             }
 
-            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsF->getDataVector().begin(),
-                                         localDistributionsF->getDataVector().end());
-            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsF->getDataVector().begin(),
-                                         nonLocalDistributionsF->getDataVector().end());
+            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), localDistributionsF->getDataVector().begin(), localDistributionsF->getDataVector().end());
+            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), nonLocalDistributionsF->getDataVector().begin(), nonLocalDistributionsF->getDataVector().end());
             if (zeroDistributionsF && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsF->getDataVector().begin(),
-                                         zeroDistributionsF->getDataVector().end());
-
-            if (localDistributionsH && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsH->getDataVector().begin(),
-                                         localDistributionsH->getDataVector().end());
-            if (nonLocalDistributionsH && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsH->getDataVector().begin(),
-                                         nonLocalDistributionsH->getDataVector().end());
-            if (zeroDistributionsH && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsH->getDataVector().begin(),
-                                         zeroDistributionsH->getDataVector().end());
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), zeroDistributionsF->getDataVector().begin(), zeroDistributionsF->getDataVector().end());
+
+            if (multiPhase)
+            {
+                if (localDistributionsH1 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), localDistributionsH1->getDataVector().begin(), localDistributionsH1->getDataVector().end());
+                if (nonLocalDistributionsH1 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), nonLocalDistributionsH1->getDataVector().begin(), nonLocalDistributionsH1->getDataVector().end());
+                if (zeroDistributionsH1 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), zeroDistributionsH1->getDataVector().begin(), zeroDistributionsH1->getDataVector().end());
+            }
+
+            /*if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+            {
+                if (localDistributionsH2 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), localDistributionsH2->getDataVector().begin(), localDistributionsH2->getDataVector().end());
+                if (nonLocalDistributionsH2 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), nonLocalDistributionsH2->getDataVector().begin(), nonLocalDistributionsH2->getDataVector().end());
+                if (zeroDistributionsH2 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), zeroDistributionsH2->getDataVector().begin(), zeroDistributionsH2->getDataVector().end());
+            }*/
 
             ic++;
         }
@@ -267,10 +280,10 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
     MPI_Type_contiguous(doubleCountInBlock , MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -287,28 +300,57 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
 
     // write to the file
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) *
-                                                                           (MPI_Offset)(doubleCountInBlock) *
-                                                                           (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
 
     MPI_File_write_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType,
-                      MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
-                      MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, write_offset, &doubleValuesArray[0], blocksCount, dataSetDoubleType,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
+
+    //-------------------------------- H1 ------------------------------------------------
+    if (multiPhase)
+    {
+        filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+        rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+        if (rc != MPI_SUCCESS)
+            throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+
+        MPI_File_sync(file_handler);
+        MPI_File_close(&file_handler);
+    }
+
+    //-------------------------------- H2 --------------------------------------------------
+    /*if (D3Q27EsoTwist3DSplittedVectorPtr2 != 0)
+    {
+        filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+        rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+        if (rc != MPI_SUCCESS)
+            throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+        write_offset = (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+        MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+
+        MPI_File_sync(file_handler);
+        MPI_File_close(&file_handler);
+    }    */
+
+    //--------------------------------
+
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeDataSet time: " << finish - start << " s");
     }
@@ -346,9 +388,12 @@ void MPIIOMigrationBECoProcessor::writeDataSet(int step)
         write3DArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
     // writeRelaxationFactor(step);
 
-    if (arrPresence.isPhaseFieldPresent)
-        write3DArray(step, PhaseField, std::string("/cpPhaseField.bin"));
-}
+    if (arrPresence.isPhaseField1Present)
+        write3DArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
+
+    if (arrPresence.isPhaseField2Present)
+        write3DArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
+    }
 
 void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::string fname)
 {
@@ -361,7 +406,8 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -374,16 +420,18 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write4DArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            switch (arrayType) {
+            switch (arrayType) 
+            {
                 case AverageDensity:
                     ___Array = block->getKernel()->getDataSet()->getAverageDensity();
                     break;
@@ -400,8 +448,7 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
                     ___Array = block->getKernel()->getDataSet()->getShearStressValues();
                     break;
                 default:
-                    UB_THROW(UbException(UB_EXARGS,
-                                         "MPIIOMigrationBECoProcessor::write4DArray : 4D array type does not exist!"));
+                    UB_THROW(UbException(UB_EXARGS, "MPIIOMigrationBECoProcessor::write4DArray : 4D array type does not exist!"));
                     break;
             }
 
@@ -410,29 +457,26 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
                 firstGlobalID = block->getGlobalID();
 
                 dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0]                                           = static_cast<int>(___Array->getNX1());
-                dataSetParamStr.nx[1]                                           = static_cast<int>(___Array->getNX2());
-                dataSetParamStr.nx[2]                                           = static_cast<int>(___Array->getNX3());
-                dataSetParamStr.nx[3]                                           = static_cast<int>(___Array->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                dataSetParamStr.nx[0] = static_cast<int>(___Array->getNX1());
+                dataSetParamStr.nx[1] = static_cast<int>(___Array->getNX2());
+                dataSetParamStr.nx[2] = static_cast<int>(___Array->getNX3());
+                dataSetParamStr.nx[3] = static_cast<int>(___Array->getNX4());
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(),
-                                         ___Array->getDataVector().end());
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
+                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(), ___Array->getDataVector().end());
 
             ic++;
         }
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write4DArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // register new MPI-type depending on the block-specific information
@@ -451,20 +495,18 @@ void MPIIOMigrationBECoProcessor::write4DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) *
-                                                                       (MPI_Offset)(doubleCountInBlock) *
-                                                                       (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, write_offset, &doubleValuesArray[0], blocksCount, dataSetDoubleType,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset, &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write4DArray time: " << finish - start << " s");
     }
@@ -481,7 +523,8 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -494,25 +537,30 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     int ic                 = 0;
     SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write3DArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            switch (arrayType) {
+            switch (arrayType) 
+            {
                 case RelaxationFactor:
                     ___Array = block->getKernel()->getDataSet()->getRelaxationFactor();
                     break;
-                case PhaseField:
+                case PhaseField1:
                     ___Array = block->getKernel()->getDataSet()->getPhaseField();
                     break;
+                case PhaseField2:
+                    ___Array = block->getKernel()->getDataSet()->getPhaseField2();
+                    break;
                 default:
                     UB_THROW(UbException(UB_EXARGS,
-                                         "MPIIOMigrationBECoProcessor::write3DArray : 3D array type does not exist!"));
+                    "MPIIOMigrationBECoProcessor::write3DArray : 3D array type does not exist!"));
                     break;
             }
 
@@ -531,17 +579,16 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
             }
 
             if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(),
-                                         ___Array->getDataVector().end());
+                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(), ___Array->getDataVector().end());
 
             ic++;
         }
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write3DArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // register new MPI-type depending on the block-specific information
@@ -560,37 +607,33 @@ void MPIIOMigrationBECoProcessor::write3DArray(int step, Arrays arrayType, std::
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) *
-                                                                       (MPI_Offset)(doubleCountInBlock) *
-                                                                       (MPI_Offset)(sizeof(double));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(firstGlobalID) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
 
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, 0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, write_offset, &doubleValuesArray[0], blocksCount, dataSetDoubleType,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset, &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::write3DArray time: " << finish - start << " s");
     }
 }
 
-//---------------------------------------------------------------------------------
-
 void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot())
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeBoundaryConds start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     int blocksCount          = 0; // quantity of blocks, that belong to this process
@@ -601,7 +644,8 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -616,24 +660,26 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     int ic                         = 0;
     SPtr<BCArray3D> bcArr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
         {
             bcArr = block->getKernel()->getBCProcessor()->getBCArray();
 
-            bcAddArray[ic].globalID =
-                block->getGlobalID();                // id of the block needed to find it while regenerating the grid
+            bcAddArray[ic].globalID = block->getGlobalID();                // id of the block needed to find it while regenerating the grid
             bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
             bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
             bytesCount[ic]                      = sizeof(BCAddMigration);
             bcVector[ic].resize(0);
             indexContainerVector[ic].resize(0);
 
-            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) {
+            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
+            {
                 BoundaryCondition *bouCond = new BoundaryCondition();
                 if (bcArr->bcvector[bc] == NULL)
                     memset(bouCond, 0, sizeof(BoundaryCondition));
-                else {
+                else 
+                {
                     bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
                     bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
@@ -662,7 +708,8 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
                 bytesCount[ic] += sizeof(BoundaryCondition);
             }
 
-            if (bcindexmatrixCountNotInit) {
+            if (bcindexmatrixCountNotInit)
+            {
                 boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
                 boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
                 boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
@@ -670,11 +717,9 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
                 bcindexmatrixCountNotInit            = false;
             }
 
-            bcindexmatrixVector.insert(bcindexmatrixVector.end(), bcArr->bcindexmatrix.getDataVector().begin(),
-                                       bcArr->bcindexmatrix.getDataVector().end());
+            bcindexmatrixVector.insert(bcindexmatrixVector.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
 
-            indexContainerVector[ic].insert(indexContainerVector[ic].begin(), bcArr->indexContainer.begin(),
-                                            bcArr->indexContainer.end());
+            indexContainerVector[ic].insert(indexContainerVector[ic].begin(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
             bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
             count_indexContainer += bcAddArray[ic].indexContainer_count;
             bytesCount[ic] += bcAddArray[ic].indexContainer_count * sizeof(int);
@@ -688,10 +733,10 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
     MPI_Type_commit(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -712,13 +757,10 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
-    MPI_Offset write_offset = (MPI_Offset)(sizeof(int)) + (MPI_Offset)(bcAddArray[0].globalID) *
-                                                              (MPI_Offset)(boundCondParamStr.bcindexmatrixCount) *
-                                                              (MPI_Offset)(sizeof(int));
+    MPI_Offset write_offset = (MPI_Offset)(sizeof(int)) + (MPI_Offset)(bcAddArray[0].globalID) * (MPI_Offset)(boundCondParamStr.bcindexmatrixCount) * (MPI_Offset)(sizeof(int));
 
     MPI_File_write_at(file_handler, 0, &boundCondParamStr.bcindexmatrixCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, write_offset, &bcindexmatrixVector[0], blocksCount, bcindexmatrixType,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, write_offset, &bcindexmatrixVector[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
@@ -732,14 +774,17 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
 
     MPI_File_write_at(file_handler, 0, &boundCondParamStr, 4, MPI_INT, MPI_STATUS_IGNORE);
 
-    write_offset =
-        (MPI_Offset)(sizeof(boundCondParam)) + (MPI_Offset)(grid->getNumberOfBlocks()) * (MPI_Offset)(sizeof(size_t));
+    write_offset = (MPI_Offset)(sizeof(boundCondParam)) + (MPI_Offset)(grid->getNumberOfBlocks()) * (MPI_Offset)(sizeof(size_t));
     size_t next_file_offset = 0;
-    if (size > 1) {
-        if (rank == 0) {
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
             next_file_offset = write_offset + allBytesCount;
             MPI_Send(&next_file_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             next_file_offset = write_offset + allBytesCount;
             if (rank < size - 1)
@@ -749,21 +794,17 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
 
     MPI_Offset write_offsetIndex;
 
-    for (int nb = 0; nb < blocksCount; nb++) {
-        write_offsetIndex =
-            (MPI_Offset)(sizeof(boundCondParam)) + (MPI_Offset)(bcAddArray[nb].globalID) * (MPI_Offset)(sizeof(size_t));
+    for (int nb = 0; nb < blocksCount; nb++) 
+    {
+        write_offsetIndex = (MPI_Offset)(sizeof(boundCondParam)) + (MPI_Offset)(bcAddArray[nb].globalID) * (MPI_Offset)(sizeof(size_t));
         MPI_File_write_at(file_handler, write_offsetIndex, &write_offset, 1, MPI_LONG_LONG_INT, MPI_STATUS_IGNORE);
 
         MPI_File_write_at(file_handler, write_offset, &bcAddArray[nb], 3, MPI_INT, MPI_STATUS_IGNORE);
         if (bcVector[nb].size() > 0)
-            MPI_File_write_at(file_handler, write_offset + (MPI_Offset)(sizeof(BCAddMigration)), &bcVector[nb][0],
-                              bcAddArray[nb].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
+            MPI_File_write_at(file_handler, write_offset + (MPI_Offset)(sizeof(BCAddMigration)), &bcVector[nb][0], bcAddArray[nb].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
 
         if (indexContainerVector[nb].size() > 0)
-            MPI_File_write_at(
-                file_handler,
-                write_offset + (MPI_Offset)(sizeof(BCAddMigration)) +
-                    (MPI_Offset)(bcAddArray[nb].boundCond_count) * (MPI_Offset)(sizeof(BoundaryCondition)),
+            MPI_File_write_at(file_handler, write_offset + (MPI_Offset)(sizeof(BCAddMigration)) + (MPI_Offset)(bcAddArray[nb].boundCond_count) * (MPI_Offset)(sizeof(BoundaryCondition)),
                 &indexContainerVector[nb][0], bcAddArray[nb].indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
 
         write_offset += bytesCount[nb];
@@ -772,7 +813,8 @@ void MPIIOMigrationBECoProcessor::writeBoundaryConds(int step)
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::writeBoundaryConds time: " << finish - start << " s");
     }
@@ -792,8 +834,7 @@ void MPIIOMigrationBECoProcessor::restart(int step)
         UBLOG(logINFO, "Load check point - start");
 
     readBlocks(step);
-    SPtr<Grid3DVisitor> newMetisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased,
-                                                                         D3Q27System::BSW, MetisPartitioner::KWAY));
+    SPtr<Grid3DVisitor> newMetisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
     grid->accept(newMetisVisitor);
 
     readDataSet(step);
@@ -806,8 +847,7 @@ void MPIIOMigrationBECoProcessor::restart(int step)
 
 void MPIIOMigrationBECoProcessor::readBlocks(int step) { MPIIOCoProcessor::readBlocks(step); }
 
-void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock,
-                                                 std::vector<double> &pV, std::vector<double> *rawDataReceive)
+void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV, std::vector<double> *rawDataReceive)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -821,7 +861,8 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
     int *blocksCounterRec  = new int[size];
 
     std::vector<double> *rawDataSend = new std::vector<double>[size];
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         rawDataSend[r].resize(0);
         blocksCounterSend[r] = 0;
         blocksCounterRec[r]  = 0;
@@ -842,8 +883,7 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
         {
             blocksCounterRec[tempRank]++;
             rawDataReceive[tempRank].push_back(double(indexB + ind));
-            rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(),
-                                            pV.begin() + ind * size_t(doubleCountInBlock),
+            rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(), pV.begin() + ind * size_t(doubleCountInBlock),
                                             pV.begin() + ind * size_t(doubleCountInBlock) + size_t(doubleCountInBlock));
         } else // we must send data to other processes
         {
@@ -856,17 +896,20 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
 
     MPI_Request *requests = new MPI_Request[size * 2]; // send + receive
     int requestCount      = 0;
-    //   MPI_Status status;
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
             MPI_Irecv(&blocksCounterRec[r], 1, MPI_INT, r, tagN, MPI_COMM_WORLD, &requests[requestCount]);
             requestCount++;
         }
     }
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
             MPI_Isend(&blocksCounterSend[r], 1, MPI_INT, r, tagN, MPI_COMM_WORLD, &requests[requestCount]);
             requestCount++;
         }
@@ -877,7 +920,8 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
     MPI_Type_contiguous(doubleCountInBlock + 1, MPI_DOUBLE, &sendBlockDoubleType);
     MPI_Type_commit(&sendBlockDoubleType);
 
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         if (r != rank)
             rawDataReceive[r].resize(size_t(blocksCounterRec[r]) * size_t(doubleCountInBlock + 1));
     }
@@ -888,35 +932,39 @@ void MPIIOMigrationBECoProcessor::blocksExchange(int tagN, int ind1, int ind2, i
     const int maxQuant   = 400;
     int restQuant;
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
             sendRecCount = int(blocksCounterRec[r] / maxQuant);
             if (sendRecCount * maxQuant < blocksCounterRec[r])
                 sendRecCount++;
             requests = (MPI_Request *)realloc(requests, (requestCount + sendRecCount) * sizeof(MPI_Request));
 
-            for (int sc = 0; sc < sendRecCount; sc++) {
+            for (int sc = 0; sc < sendRecCount; sc++)
+            {
                 restQuant     = (sc < sendRecCount - 1) ? maxQuant : blocksCounterRec[r] - sc * maxQuant;
                 sendRecOffset = size_t(sc) * size_t(maxQuant) * size_t((doubleCountInBlock + 1));
-                MPI_Irecv(&rawDataReceive[r][sendRecOffset], restQuant, sendBlockDoubleType, r, tagN, MPI_COMM_WORLD,
-                          &requests[requestCount]);
+                MPI_Irecv(&rawDataReceive[r][sendRecOffset], restQuant, sendBlockDoubleType, r, tagN, MPI_COMM_WORLD, &requests[requestCount]);
                 requestCount++;
             }
         }
     }
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
             sendRecCount = int(blocksCounterSend[r] / maxQuant);
             if (sendRecCount * maxQuant < blocksCounterSend[r])
                 sendRecCount++;
             requests = (MPI_Request *)realloc(requests, (requestCount + sendRecCount) * sizeof(MPI_Request));
 
-            for (int sc = 0; sc < sendRecCount; sc++) {
+            for (int sc = 0; sc < sendRecCount; sc++) 
+            {
                 restQuant     = (sc < sendRecCount - 1) ? maxQuant : blocksCounterSend[r] - sc * maxQuant;
                 sendRecOffset = size_t(sc) * size_t(maxQuant) * size_t((doubleCountInBlock + 1));
-                MPI_Isend(&rawDataSend[r][sendRecOffset], restQuant, sendBlockDoubleType, r, tagN, MPI_COMM_WORLD,
-                          &requests[requestCount]);
+                MPI_Isend(&rawDataSend[r][sendRecOffset], restQuant, sendBlockDoubleType, r, tagN, MPI_COMM_WORLD,  &requests[requestCount]);
                 requestCount++;
             }
         }
@@ -942,13 +990,20 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         UB_THROW(UbException(UB_EXARGS, "bcProcessor does not exist!"));
     if (nue == -999.999)
         UB_THROW(UbException(UB_EXARGS, "nue is not initialised!"));
+    if (nuL == -999.999 )
+        UB_THROW(UbException(UB_EXARGS, "nuL is not initialised!"));
+    if (nuG == -999.999)
+        UB_THROW(UbException(UB_EXARGS, "nuG is not initialised!"));
+    if (densityRatio == -999.999)
+        UB_THROW(UbException(UB_EXARGS, "densityRatio is not initialised!"));
 
-    if (comm->isRoot()) {
+    if (comm->isRoot())
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    bool multiPhase = false;
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
 
     int blocksCountAll   = grid->getNumberOfBlocks(); // quantity of all blocks in the grid
@@ -968,150 +1023,183 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
         start = MPI_Wtime();
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-    size_t doubleCountInBlock =
-        (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+    size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3]) * 2;
-    std::vector<double> doubleValuesArray(myBlocksCount * doubleCountInBlock); // double-values in all blocks
+        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<double> doubleValuesArrayF(size_t(myBlocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
+    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
+    //std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    MPI_Offset read_offset =
-        (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
-    MPI_File_read_at(file_handler, read_offset, &doubleValuesArray[0], int(myBlocksCount), dataSetDoubleType,
-                     MPI_STATUS_IGNORE);
+    //--------------------------------- F ---------------------------------------------------------
+    MPI_Offset read_offset = (MPI_Offset)(3 * sizeof(dataSetParam)) + (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double));
+    MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayF[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
+
+    MPI_File_close(&file_handler);
+
+    //--------------------------------- H1 ---------------------------------------------------------
+    MPI_Offset fsize;
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
+    {
+        multiPhase = true;
+        doubleValuesArrayH1.resize(myBlocksCount * doubleCountInBlock);
 
+        read_offset = (MPI_Offset)(indexB * doubleCountInBlock * sizeof(double)) ;
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
+    }
     MPI_File_close(&file_handler);
+
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet start of exchange of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
-    std::vector<double> *rawDataReceive = new std::vector<double>[size];
+    std::vector<double>* rawDataReceiveF = new std::vector<double>[size];
     for (int r = 0; r < size; r++)
-        rawDataReceive[r].resize(0);
+        rawDataReceiveF[r].resize(0);
+    blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayF, rawDataReceiveF);
+
+    std::vector<double>* rawDataReceiveH1 = new std::vector<double>[size];
+    for (int r = 0; r < size; r++)
+        rawDataReceiveH1[r].resize(0);
+    blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayH1, rawDataReceiveH1);
 
-    blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArray, rawDataReceive);
+    /*    std::vector<double>* rawDataReceiveH2 = new std::vector<double>[size];
+        for (int r = 0; r < size; r++)
+            rawDataReceiveH2[r].resize(0);
+        blocksExchange(MESSAGE_TAG, indexB, indexE, int(doubleCountInBlock), doubleValuesArrayH2, rawDataReceiveH2);*/
 
-    if (comm->isRoot()) {
+    if (comm->isRoot())
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
-
+    
     //-------------------------------------- restore blocks ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH1, vectorsOfValuesH2, vectorsOfValuesH3;
-
-    size_t vectorSize1 =
-        dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
-    size_t vectorSize2 =
-        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
-    size_t vectorSize3 =
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<LBMReal> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<LBMReal> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    //std::vector<LBMReal> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+
+    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
+    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
+    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
     size_t index;
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         index = 0;
-        for (int ii = 0; ii < int(rawDataReceive[r].size() / doubleCountInBlock); ii++) {
-            blockID = (int)(rawDataReceive[r][index]);
+        for (int ii = 0; ii < int(rawDataReceiveF[r].size() / doubleCountInBlock); ii++) 
+        {
+            blockID = (int)(rawDataReceiveF[r][index]);
             index += 1;
 
-            vectorsOfValuesF1.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize1);
-            index += vectorSize1;
-
-            vectorsOfValuesF2.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize2);
-            index += vectorSize2;
-
-            vectorsOfValuesF3.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize3);
-            index += vectorSize3;
-
-            vectorsOfValuesH1.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize1);
+            vectorsOfValuesF1.assign(rawDataReceiveF[r].data() + index, rawDataReceiveF[r].data() + index + vectorSize1);
+            if(multiPhase)
+                vectorsOfValuesH11.assign(rawDataReceiveH1[r].data() + index, rawDataReceiveH1[r].data() + index + vectorSize1);
+            //vectorsOfValuesH21.assign(rawDataReceiveH2[r].data() + index, rawDataReceiveH2[r].data() + index + vectorSize1);
             index += vectorSize1;
 
-            vectorsOfValuesH2.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize2);
+            vectorsOfValuesF2.assign(rawDataReceiveF[r].data() + index, rawDataReceiveF[r].data() + index + vectorSize2);
+            if (multiPhase)
+                vectorsOfValuesH12.assign(rawDataReceiveH1[r].data() + index, rawDataReceiveH1[r].data() + index + vectorSize1);
+            //vectorsOfValuesH22.assign(rawDataReceiveH2[r].data() + index, rawDataReceiveH2[r].data() + index + vectorSize1);
             index += vectorSize2;
 
-            vectorsOfValuesH3.assign(rawDataReceive[r].data() + index, rawDataReceive[r].data() + index + vectorSize3);
+            vectorsOfValuesF3.assign(rawDataReceiveF[r].data() + index, rawDataReceiveF[r].data() + index + vectorSize3);
+            if (multiPhase)
+                vectorsOfValuesH13.assign(rawDataReceiveH1[r].data() + index, rawDataReceiveH1[r].data() + index + vectorSize1);
+                //vectorsOfValuesH23.assign(rawDataReceiveH2[r].data() + index, rawDataReceiveH2[r].data() + index + vectorSize1);
             index += vectorSize3;
 
             SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-                ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0],
-                                                            dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                                                            dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-                ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0],
-                                                            dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                                                            dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-                ->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
                         vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
             dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
 
-            SPtr<DistributionArray3D> mHdistributions(new D3Q27EsoTwist3DSplittedVector());
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-                ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH1, dataSetParamStr1.nx[0],
-                                                            dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                                                            dataSetParamStr1.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-                ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH2, dataSetParamStr2.nx[0],
-                                                            dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                                                            dataSetParamStr2.nx[3])));
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-                ->setZeroDistributions( CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                        vectorsOfValuesH3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX1(dataSetParamStr1.nx1);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX2(dataSetParamStr1.nx2);
-            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX3(dataSetParamStr1.nx3);
+            SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
+            if (multiPhase)
+            {
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
+                dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
+            }
+
+            /*SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                    new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);*/
+
 
             // find the nesessary block and fill it
             SPtr<Block3D> block = grid->getBlock(blockID);
             this->lbmKernel->setBlock(block);
             SPtr<LBMKernel> kernel = this->lbmKernel->clone();
-            LBMReal collFactor     = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
+            LBMReal collFactor = LBMSystem::calcCollisionFactor(this->nue, block->getLevel());
+            LBMReal collFactorL = LBMSystem::calcCollisionFactor(this->nuL, block->getLevel());
+            LBMReal collFactorG = LBMSystem::calcCollisionFactor(this->nuG, block->getLevel());
             kernel->setCollisionFactor(collFactor);
             kernel->setIndex(block->getX1(), block->getX2(), block->getX3());
             kernel->setDeltaT(LBMSystem::getDeltaT(block->getLevel()));
+            kernel->setCollisionFactorMultiphase(collFactorL, collFactorG);
+            kernel->setDensityRatio(this->densityRatio);
             SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
             dataSetPtr->setFdistributions(mFdistributions);
-            dataSetPtr->setHdistributions(mHdistributions);
+            if (multiPhase)
+                dataSetPtr->setHdistributions(mH1distributions);
+//            dataSetPtr->setHdistributions(mH2distributions);
             kernel->setDataSet(dataSetPtr);
             block->setKernel(kernel);
         }
     }
-    if (comm->isRoot()) {
+    //if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readDataSet end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     //-------------------------------------------------------------
@@ -1149,11 +1237,16 @@ void MPIIOMigrationBECoProcessor::readDataSet(int step)
     if (arrPresence.isRelaxationFactorPresent)
         readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
     //   readRelaxationFactor(step);
+ 
+    if (arrPresence.isPhaseField1Present)
+        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
 
-    if (arrPresence.isPhaseFieldPresent)
-        readArray(step, PhaseField, std::string("/cpPhaseField.bin"));
+    if (arrPresence.isPhaseField2Present)
+        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
 
-    delete[] rawDataReceive;
+    delete[] rawDataReceiveF;
+//    delete[] rawDataReceiveH1;
+//    delete[] rawDataReceiveH2;
 }
 
 void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::string fname)
@@ -1162,11 +1255,12 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
+
     double start, finish;
     if (comm->isRoot())
         start = MPI_Wtime();
@@ -1194,56 +1288,53 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
 
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-    size_t doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(myBlocksCount * doubleCountInBlock); // double-values in all blocks
 
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) +
-                             (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
-    MPI_File_read_at(file_handler, read_offset, &doubleValuesArray[0], int(myBlocksCount), dataSetDoubleType,
-                     MPI_STATUS_IGNORE);
+    MPI_Offset read_offset = (MPI_Offset)(sizeof(dataSetParam)) + (MPI_Offset)(indexB) * (MPI_Offset)(doubleCountInBlock) * (MPI_Offset)(sizeof(double));
+    MPI_File_read_at(file_handler, read_offset, &doubleValuesArray[0], int(myBlocksCount), dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray start of exchange of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     std::vector<double> *rawDataReceive = new std::vector<double>[size];
     for (int r = 0; r < size; r++)
         rawDataReceive[r].resize(0);
 
-    blocksExchange(MESSAGE_TAG + int(arrType), indexB, indexE, int(doubleCountInBlock), doubleValuesArray,
-                   rawDataReceive);
+    blocksExchange(MESSAGE_TAG + int(arrType), indexB, indexE, int(doubleCountInBlock), doubleValuesArray, rawDataReceive);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray end of exchange of data, rank = " << rank);
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     //----------------------------- restore data ---------------------------------
     int blockID;
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     size_t index;
-    size_t nextVectorSize =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         index = 0;
-        for (int ii = 0; ii < int(rawDataReceive[r].size() / doubleCountInBlock); ii++) {
-            blockID             = (int)(rawDataReceive[r][index]);
+        for (int ii = 0; ii < int(rawDataReceive[r].size() / doubleCountInBlock); ii++) 
+        {
+            blockID = (int)(rawDataReceive[r][index]);
             SPtr<Block3D> block = grid->getBlock(blockID);
             index += 1;
 
@@ -1254,40 +1345,31 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
             SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
             SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
 
-            switch (arrType) {
+            switch (arrType) 
+            {
                 case AverageDensity:
-                    ___4DArray =
-                        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                            dataSetParamStr.nx[3]));
+                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                     break;
                 case AverageVelocity:
-                    ___4DArray =
-                        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                            dataSetParamStr.nx[3]));
+                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                     break;
                 case AverageFluktuations:
-                    ___4DArray =
-                        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                            dataSetParamStr.nx[3]));
+                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                     break;
                 case AverageTriple:
-                    ___4DArray =
-                        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                            dataSetParamStr.nx[3]));
+                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                     break;
                 case ShearStressVal:
-                    ___4DArray =
-                        CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                            dataSetParamStr.nx[3]));
+                    ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
+                            vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                     block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                     break;
                 case RelaxationFactor:
@@ -1295,14 +1377,18 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                     break;
-                case PhaseField:
+                case PhaseField1:
                     ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
                         vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                     block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                     break;
+                case PhaseField2:
+                    ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                        vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+                    block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
+                    break;
                 default:
-                    UB_THROW(
-                        UbException(UB_EXARGS, "MPIIOMigrationBECoProcessor::readArray : array type does not exist!"));
+                    UB_THROW(UbException(UB_EXARGS, "MPIIOMigrationBECoProcessor::readArray : array type does not exist!"));
                     break;
             } 
         }
@@ -1310,10 +1396,10 @@ void MPIIOMigrationBECoProcessor::readArray(int step, Arrays arrType, std::strin
 
     delete[] rawDataReceive;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 }
 
@@ -1323,10 +1409,10 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -1360,25 +1446,24 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     MPI_Type_contiguous(sizeOfBIM, MPI_INT, &bcindexmatrixType);
     MPI_Type_commit(&bcindexmatrixType);
 
-    MPI_Offset read_offset =
-        (MPI_Offset)(sizeof(int)) + (MPI_Offset)(indexB) * (MPI_Offset)(sizeOfBIM) * (MPI_Offset)(sizeof(int));
-    MPI_File_read_at(file_handler, read_offset, &bcindexmatrixVAll[0], int(myBlocksCount), bcindexmatrixType,
-                     MPI_STATUS_IGNORE);
+    MPI_Offset read_offset = (MPI_Offset)(sizeof(int)) + (MPI_Offset)(indexB) * (MPI_Offset)(sizeOfBIM) * (MPI_Offset)(sizeof(int));
+    MPI_File_read_at(file_handler, read_offset, &bcindexmatrixVAll[0], int(myBlocksCount), bcindexmatrixType, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
     MPI_Type_free(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds start of exchange of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     std::vector<int> *rawDataReceive = new std::vector<int>[size];
     std::vector<int> *rawDataSend    = new std::vector<int>[size];
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         rawDataReceive[r].resize(0);
         rawDataSend[r].resize(0);
         rawDataReceive[r].push_back(0);
@@ -1396,14 +1481,12 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
         {
             rawDataReceive[tempRank][0]++;
             rawDataReceive[tempRank].push_back(indexB + ind);
-            rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(), bcindexmatrixVAll.begin() + ind * sizeOfBIM,
-                                            bcindexmatrixVAll.begin() + ind * sizeOfBIM + sizeOfBIM);
+            rawDataReceive[tempRank].insert(rawDataReceive[tempRank].end(), bcindexmatrixVAll.begin() + ind * sizeOfBIM, bcindexmatrixVAll.begin() + ind * sizeOfBIM + sizeOfBIM);
         } else // we must send data to other processes
         {
             rawDataSend[tempRank][0]++;
             rawDataSend[tempRank].push_back(indexB + ind);
-            rawDataSend[tempRank].insert(rawDataSend[tempRank].end(), bcindexmatrixVAll.begin() + ind * sizeOfBIM,
-                                         bcindexmatrixVAll.begin() + ind * sizeOfBIM + sizeOfBIM);
+            rawDataSend[tempRank].insert(rawDataSend[tempRank].end(), bcindexmatrixVAll.begin() + ind * sizeOfBIM, bcindexmatrixVAll.begin() + ind * sizeOfBIM + sizeOfBIM);
         }
     }
 
@@ -1414,9 +1497,11 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     int intBlockCount;
     int rds;
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
-            rds           = int(rawDataSend[r].size());
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
+            rds = int(rawDataSend[r].size());
             intBlockCount = (int)(rds / SEND_BLOCK_SIZE);
             if (intBlockCount * SEND_BLOCK_SIZE < rds)
                 intBlockCount += 1;
@@ -1424,21 +1509,21 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
             for (int i = rds; i < intBlockCount * SEND_BLOCK_SIZE; i++)
                 rawDataSend[r].push_back(0);
 
-            MPI_Isend(&rawDataSend[r][0], intBlockCount, sendBlockIntType, r, MESSAGE_TAG + 7, MPI_COMM_WORLD,
-                      &requests[requestCount]);
+            MPI_Isend(&rawDataSend[r][0], intBlockCount, sendBlockIntType, r, MESSAGE_TAG + 7, MPI_COMM_WORLD, &requests[requestCount]);
             // MPI_Isend(&rawDataSend[r][0], rawDataSend[r].size(), MPI_INT, r, MESSAGE_TAG + 7, MPI_COMM_WORLD,
             // &requests[requestCount]);
             requestCount++;
         }
     }
 
-    for (int r = 0; r < size; r++) {
-        if (r != rank) {
+    for (int r = 0; r < size; r++) 
+    {
+        if (r != rank) 
+        {
             MPI_Probe(r, MESSAGE_TAG + 7, MPI_COMM_WORLD, &status);
             MPI_Get_count(&status, sendBlockIntType, &quant);
             rawDataReceive[r].resize(quant * SEND_BLOCK_SIZE);
-            MPI_Irecv(&rawDataReceive[r][0], quant, sendBlockIntType, r, MESSAGE_TAG + 7, MPI_COMM_WORLD,
-                      &requests[requestCount]);
+            MPI_Irecv(&rawDataReceive[r][0], quant, sendBlockIntType, r, MESSAGE_TAG + 7, MPI_COMM_WORLD, &requests[requestCount]);
             requestCount++;
         }
     }
@@ -1447,17 +1532,17 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
 
     //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds end of exchange of data, rank = " << rank);
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC2.bin";
-    rc       = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
@@ -1475,10 +1560,12 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     std::vector<int> indexContainerV;
     std::vector<int> bcindexmatrixV;
 
-    for (int r = 0; r < size; r++) {
+    for (int r = 0; r < size; r++) 
+    {
         index = 1;
 
-        for (int ii = 0; ii < rawDataReceive[r][0]; ii++) {
+        for (int ii = 0; ii < rawDataReceive[r][0]; ii++) 
+        {
             blockID = (int)(rawDataReceive[r][index]);
             index += 1;
 
@@ -1498,18 +1585,19 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
                                  bcAddArray.boundCond_count, boundCondType, MPI_STATUS_IGNORE);
 
             if (bcAddArray.indexContainer_count > 0)
-                MPI_File_read_at(file_handler,
-                                 read_offset2 + (MPI_Offset)(sizeof(BCAddMigration)) +
+                MPI_File_read_at(file_handler, read_offset2 + (MPI_Offset)(sizeof(BCAddMigration)) +
                                      (MPI_Offset)(bcAddArray.boundCond_count) * (MPI_Offset)(sizeof(BoundaryCondition)),
                                  &indexContainerV[0], bcAddArray.indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
 
             bcVector.resize(0);
 
-            for (int ibc = 0; ibc < bcAddArray.boundCond_count; ibc++) {
+            for (int ibc = 0; ibc < bcAddArray.boundCond_count; ibc++) 
+            {
                 SPtr<BoundaryConditions> bc;
                 if (memcmp(&bcArray[ibc], nullBouCond, sizeof(BoundaryCondition)) == 0)
                     bc = SPtr<BoundaryConditions>();
-                else {
+                else 
+                {
                     bc                         = SPtr<BoundaryConditions>(new BoundaryConditions);
                     bc->noslipBoundaryFlags    = bcArray[ibc].noslipBoundaryFlags;
                     bc->slipBoundaryFlags      = bcArray[ibc].slipBoundaryFlags;
@@ -1537,8 +1625,7 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
                 bcVector.push_back(bc);
             }
 
-            CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2,
-                                               boundCondParamStr.nx3);
+            CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2, boundCondParamStr.nx3);
             SPtr<Block3D> block1 = grid->getBlock(blockID);
 
             SPtr<BCProcessor> bcProc = bcProcessor->clone(block1->getKernel());
@@ -1562,12 +1649,12 @@ void MPIIOMigrationBECoProcessor::readBoundaryConds(int step)
     delete[] rawDataSend;
     delete[] requests;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds end of restore of data, rank = " << rank);
         UBLOG(logINFO, "MPIIOMigrationBECoProcessor::readBoundaryConds time: " << finish - start << " s");
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 }
 
@@ -1577,3 +1664,8 @@ void MPIIOMigrationBECoProcessor::setLBMKernel(SPtr<LBMKernel> kernel) { this->l
 void MPIIOMigrationBECoProcessor::setBCProcessor(SPtr<BCProcessor> bcProcessor) { this->bcProcessor = bcProcessor; }
 //////////////////////////////////////////////////////////////////////////
 void MPIIOMigrationBECoProcessor::setNu(double nu) { this->nue = nu; }
+
+void MPIIOMigrationBECoProcessor::setNuLG(double cfL, double cfG) { this->nuL = cfL;  this->nuG = cfG; }
+
+void MPIIOMigrationBECoProcessor::setDensityRatio(double dr) { this->densityRatio = dr; }
+
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
index 4d825fde3956dcbe711f49b18b57cd929ba986d9..9a89ada1ae039d10cd53b06b189e5709398911c8 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationBECoProcessor.h
@@ -25,7 +25,8 @@ class MPIIOMigrationBECoProcessor : public MPIIOCoProcessor
         AverageTriple       = 4,
         ShearStressVal      = 5,
         RelaxationFactor    = 6,
-        PhaseField          = 7
+        PhaseField1         = 7,
+        PhaseField2 = 8
     };
 
 public:
@@ -71,6 +72,8 @@ public:
     //! The function truncates the data files
     void clearAllFiles(int step);
     void setNu(double nu);
+    void setNuLG(double cfL, double cfG);
+    void setDensityRatio(double dr);
 
     void blocksExchange(int tagN, int ind1, int ind2, int doubleCountInBlock, std::vector<double> &pV,
                         std::vector<double> *rawDataReceive);
@@ -91,6 +94,10 @@ private:
     SPtr<LBMKernel> lbmKernel;
     SPtr<BCProcessor> bcProcessor;
     double nue;
+    double nuL;
+    double nuG;
+    double densityRatio;
+
 };
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
index 69cc8795bb7fdb4791420473a242a0f1ff96a06d..fae3b48463a45499ea7c8d5b78d4bcfd01d1bb81 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.cpp
@@ -22,8 +22,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIOMigrationCoProcessor::MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                     SPtr<Communicator> comm)
+MPIIOMigrationCoProcessor::MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
     : MPIIOCoProcessor(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
@@ -31,7 +30,7 @@ MPIIOMigrationCoProcessor::MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbS
     //-------------------------   define MPI types  ---------------------------------
 
     MPI_Datatype typesDataSet[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
-    int blocksDataSet[3]         = { 2, 2, 2 };
+    int blocksDataSet[3]         = { 5, 2, 2 };
     MPI_Aint offsetsDatatSet[3], lbDataSet, extentDataSet;
 
     offsetsDatatSet[0] = 0;
@@ -71,7 +70,8 @@ MPIIOMigrationCoProcessor::~MPIIOMigrationCoProcessor()
 //////////////////////////////////////////////////////////////////////////
 void MPIIOMigrationCoProcessor::process(double step)
 {
-    if (scheduler->isDue(step)) {
+    if (scheduler->isDue(step)) 
+    {
         if (comm->isRoot())
             UBLOG(logINFO, "MPIIOMigrationCoProcessor save step: " << step);
         if (comm->isRoot())
@@ -100,8 +100,7 @@ void MPIIOMigrationCoProcessor::clearAllFiles(int step)
     UbSystem::makeDirectory(path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step));
 
     std::string filename10 = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpBC.bin";
-    int rc10 =
-        MPI_File_open(MPI_COMM_WORLD, filename10.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    int rc10 = MPI_File_open(MPI_COMM_WORLD, filename10.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc10 != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename10);
     MPI_File_set_size(file_handler, new_size);
@@ -125,69 +124,93 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetMigration *dataSetArray = new DataSetMigration[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
+    // std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeDataSet start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    bool multiPhase = false;
     DSArraysPresence arrPresence;
     bool firstBlock           = true;
     size_t doubleCountInBlock = 0;
     int ic                    = 0;
-    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH;
+    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF = 0, D3Q27EsoTwist3DSplittedVectorPtrH1 = 0, D3Q27EsoTwist3DSplittedVectorPtrH2 = 0;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF = 0, localDistributionsH1 = 0, localDistributionsH2 = 0;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF = 0, nonLocalDistributionsH1 = 0, nonLocalDistributionsH2 = 0;
+    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF = 0, zeroDistributionsH1 = 0, zeroDistributionsH2 = 0;
+
+    SPtr<LBMKernel> kernel;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetArray[ic].globalID =
-                block->getGlobalID(); // id of the block needed to find it while regenerating the grid
-            dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
-            dataSetArray[ic].collFactor      = block->getKernel()->getCollisionFactor();
-            dataSetArray[ic].deltaT          = block->getKernel()->getDeltaT();
-            dataSetArray[ic].compressible    = block->getKernel()->getCompressible();
-            dataSetArray[ic].withForcing     = block->getKernel()->getWithForcing();
-
-            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getFdistributions());
+            kernel = dynamicPointerCast<LBMKernel>(block->getKernel());
+
+            dataSetArray[ic].globalID = block->getGlobalID(); // id of the block needed to find it while regenerating the grid
+            dataSetArray[ic].ghostLayerWidth = kernel->getGhostLayerWidth();
+            dataSetArray[ic].collFactor = kernel->getCollisionFactor();
+            dataSetArray[ic].deltaT = kernel->getDeltaT();
+            dataSetArray[ic].compressible = kernel->getCompressible();
+            dataSetArray[ic].withForcing = kernel->getWithForcing();
+            dataSetArray[ic].collFactorL = kernel->getCollisionFactorL();
+            dataSetArray[ic].collFactorG = kernel->getCollisionFactorG();
+            dataSetArray[ic].densityRatio = kernel->getDensityRatio();
+
+            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
             localDistributionsF = D3Q27EsoTwist3DSplittedVectorPtrF->getLocalDistributions();
             nonLocalDistributionsF = D3Q27EsoTwist3DSplittedVectorPtrF->getNonLocalDistributions();
             zeroDistributionsF = D3Q27EsoTwist3DSplittedVectorPtrF->getZeroDistributions();
 
-            D3Q27EsoTwist3DSplittedVectorPtrH = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getHdistributions());
-            localDistributionsH = D3Q27EsoTwist3DSplittedVectorPtrH->getLocalDistributions();
-            nonLocalDistributionsH = D3Q27EsoTwist3DSplittedVectorPtrH->getNonLocalDistributions();
-            zeroDistributionsH = D3Q27EsoTwist3DSplittedVectorPtrH->getZeroDistributions();
+            D3Q27EsoTwist3DSplittedVectorPtrH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getHdistributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH1 != 0)
+            {
+                multiPhase = true;
+                localDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getLocalDistributions();
+                nonLocalDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getNonLocalDistributions();
+                zeroDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getZeroDistributions();
+            }
+
+            /*D3Q27EsoTwist3DSplittedVectorPtrH2 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getH2distributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+            {
+                localDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getLocalDistributions();
+                nonLocalDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getNonLocalDistributions();
+                zeroDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getZeroDistributions();
+            }*/
 
             if (firstBlock) // && block->getKernel()) // when first (any) valid block...
             {
-                if (localDistributionsF) {
+                if (localDistributionsF)
+                {
                     dataSetParamStr1.nx[0] = static_cast<int>(localDistributionsF->getNX1());
                     dataSetParamStr1.nx[1] = static_cast<int>(localDistributionsF->getNX2());
                     dataSetParamStr1.nx[2] = static_cast<int>(localDistributionsF->getNX3());
                     dataSetParamStr1.nx[3] = static_cast<int>(localDistributionsF->getNX4());
                 }
 
-                if (nonLocalDistributionsF) {
+                if (nonLocalDistributionsF)
+                {
                     dataSetParamStr2.nx[0] = static_cast<int>(nonLocalDistributionsF->getNX1());
                     dataSetParamStr2.nx[1] = static_cast<int>(nonLocalDistributionsF->getNX2());
                     dataSetParamStr2.nx[2] = static_cast<int>(nonLocalDistributionsF->getNX3());
                     dataSetParamStr2.nx[3] = static_cast<int>(nonLocalDistributionsF->getNX4());
                 }
-                if (zeroDistributionsF) {
+                if (zeroDistributionsF)
+                {
                     dataSetParamStr3.nx[0] = static_cast<int>(zeroDistributionsF->getNX1());
                     dataSetParamStr3.nx[1] = static_cast<int>(zeroDistributionsF->getNX2());
                     dataSetParamStr3.nx[2] = static_cast<int>(zeroDistributionsF->getNX3());
@@ -195,94 +218,91 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
                 }
 
                 // ... than save some parameters that are equal in all blocks
-                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
-                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
-                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
-
-                //  Fdistributions + Hdistributions
-                doubleCountInBlock =
-                    (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
-                        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-                        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3]) * 2;
-
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray =
-                    block->getKernel()->getDataSet()->getAverageDensity();
+                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
+                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
+                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
+
+                doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+                    dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
+                    dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = block->getKernel()->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = block->getKernel()->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr =
-                    block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = block->getKernel()->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr =
-                    block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = block->getKernel()->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr =
-                    block->getKernel()->getDataSet()->getPhaseField();
-                if (phaseField3DPtr)
-                    arrPresence.isPhaseFieldPresent = true;
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = block->getKernel()->getDataSet()->getPhaseField();
+                if (phaseField3DPtr1)
+                    arrPresence.isPhaseField1Present = true;
+                else
+                    arrPresence.isPhaseField1Present = false;
+
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = block->getKernel()->getDataSet()->getPhaseField2();
+                if (phaseField3DPtr2)
+                    arrPresence.isPhaseField2Present = true;
                 else
-                    arrPresence.isPhaseFieldPresent = false;
+                    arrPresence.isPhaseField2Present = false;
 
                 firstBlock = false;
             }
 
-            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsF->getDataVector().begin(),
-                    localDistributionsF->getDataVector().end());
-            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsF->getDataVector().begin(),
-                    nonLocalDistributionsF->getDataVector().end());
+            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), localDistributionsF->getDataVector().begin(), localDistributionsF->getDataVector().end());
+            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), nonLocalDistributionsF->getDataVector().begin(), nonLocalDistributionsF->getDataVector().end());
             if (zeroDistributionsF && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsF->getDataVector().begin(),
-                    zeroDistributionsF->getDataVector().end());
-
-            if (localDistributionsH && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsH->getDataVector().begin(),
-                    localDistributionsH->getDataVector().end());
-            if (nonLocalDistributionsH && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsH->getDataVector().begin(),
-                    nonLocalDistributionsH->getDataVector().end());
-            if (zeroDistributionsH && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsH->getDataVector().begin(),
-                    zeroDistributionsH->getDataVector().end());
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), zeroDistributionsF->getDataVector().begin(), zeroDistributionsF->getDataVector().end());
+
+            if (multiPhase)
+            {
+                if (localDistributionsH1 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), localDistributionsH1->getDataVector().begin(), localDistributionsH1->getDataVector().end());
+                if (nonLocalDistributionsH1 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), nonLocalDistributionsH1->getDataVector().begin(), nonLocalDistributionsH1->getDataVector().end());
+                if (zeroDistributionsH1 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), zeroDistributionsH1->getDataVector().begin(), zeroDistributionsH1->getDataVector().end());
+            }
+
+            /*if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+            {
+                if (localDistributionsH2 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                    doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), localDistributionsH2->getDataVector().begin(), localDistributionsH2->getDataVector().end());
+                if (nonLocalDistributionsH2 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                    doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), nonLocalDistributionsH2->getDataVector().begin(), nonLocalDistributionsH2->getDataVector().end());
+                if (zeroDistributionsH2 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                    doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), zeroDistributionsH2->getDataVector().begin(), zeroDistributionsH2->getDataVector().end());
+            }*/
 
             ic++;
         }
@@ -292,10 +312,10 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -311,32 +331,73 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
 
     // write to the file
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
     MPI_File_write_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType,
-                      MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
-                      MPI_STATUS_IGNORE);
-
+    MPI_File_write_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    
     MPI_Offset write_offset;
     size_t sizeofOneDataSet = sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double);
 
-    for (int nb = 0; nb < blocksCount; nb++) {
+    for (int nb = 0; nb < blocksCount; nb++) 
+    {
         write_offset = (MPI_Offset)(3 * sizeof(dataSetParam) + dataSetArray[nb].globalID * sizeofOneDataSet);
         MPI_File_write_at(file_handler, write_offset, &dataSetArray[nb], 1, dataSetType, MPI_STATUS_IGNORE);
-        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(DataSetMigration)),
-                          &doubleValuesArray[nb * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(DataSetMigration)), &doubleValuesArrayF[nb * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
     }
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
+
+    //-------------------------------- H1 ----------------------------------------------------
+    if (multiPhase)
+    {
+        filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+        rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+        if (rc != MPI_SUCCESS)
+            throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+
+        for (int nb = 0; nb < blocksCount; nb++) 
+        {
+            write_offset = (MPI_Offset)(dataSetArray[nb].globalID * sizeofOneDataSet);
+            MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH1[nb * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+        }
+
+        MPI_File_sync(file_handler);
+        MPI_File_close(&file_handler);
+    }
+
+    //-------------------------------- H2 ----------------------------------------------------
+    /*if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+    {
+        filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+        rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+        if (rc != MPI_SUCCESS)
+            throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+        sizeofOneDataSet = doubleCountInBlock * sizeof(double);
+
+        for (int nb = 0; nb < blocksCount; nb++) 
+        {
+            write_offset = (MPI_Offset)(dataSetArray[nb].globalID * sizeofOneDataSet);
+            MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH2[nb * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+        }
+
+        MPI_File_sync(file_handler);
+        MPI_File_close(&file_handler);
+    }*/
+    //--------------------------------
+
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeDataSet time: " << finish - start << " s");
     }
@@ -375,8 +436,11 @@ void MPIIOMigrationCoProcessor::writeDataSet(int step)
     if (arrPresence.isRelaxationFactorPresent)
         write3DArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
 
-    if (arrPresence.isPhaseFieldPresent)
-        write3DArray(step, PhaseField, std::string("/cpPhaseField.bin"));
+    if (arrPresence.isPhaseField1Present)
+        write3DArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
+
+    if (arrPresence.isPhaseField2Present)
+        write3DArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
 
 }
 
@@ -391,7 +455,8 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -400,10 +465,10 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeAverageDensityArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock           = true;
@@ -411,13 +476,14 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     int ic                    = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___Array;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].globalID =
-                block->getGlobalID(); // id of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].globalID = block->getGlobalID(); // id of the block needed to find it while regenerating the grid
 
-            switch (arrayType) {
+            switch (arrayType) 
+            {
                 case AverageDensity:
                     ___Array = block->getKernel()->getDataSet()->getAverageDensity();
                     break;
@@ -434,28 +500,24 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
                     ___Array = block->getKernel()->getDataSet()->getShearStressValues();
                     break;
                 default:
-                    UB_THROW(UbException(UB_EXARGS,
-                                         "MPIIOMigrationCoProcessor::write4DArray : 4D array type does not exist!"));
+                    UB_THROW(UbException(UB_EXARGS, "MPIIOMigrationCoProcessor::write4DArray : 4D array type does not exist!"));
                     break;
             }
 
             if (firstBlock) // when first (any) valid block...
             {
                 dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0]                                           = static_cast<int>(___Array->getNX1());
-                dataSetParamStr.nx[1]                                           = static_cast<int>(___Array->getNX2());
-                dataSetParamStr.nx[2]                                           = static_cast<int>(___Array->getNX3());
-                dataSetParamStr.nx[3]                                           = static_cast<int>(___Array->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                dataSetParamStr.nx[0] = static_cast<int>(___Array->getNX1());
+                dataSetParamStr.nx[1] = static_cast<int>(___Array->getNX2());
+                dataSetParamStr.nx[2] = static_cast<int>(___Array->getNX3());
+                dataSetParamStr.nx[3] = static_cast<int>(___Array->getNX4());
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if (___Array && (dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(),
-                                         ___Array->getDataVector().end());
+            if (___Array && (dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
+                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(), ___Array->getDataVector().end());
 
             ic++;
         }
@@ -465,10 +527,10 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::write4DArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -489,7 +551,8 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     MPI_Offset write_offset;
     size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
 
-    for (int nb = 0; nb < blocksCount; nb++) {
+    for (int nb = 0; nb < blocksCount; nb++) 
+    {
         write_offset = (MPI_Offset)(sizeof(dataSetParam) + dataSetSmallArray[nb].globalID * sizeofOneDataSet);
         MPI_File_write_at(file_handler, write_offset, &dataSetSmallArray[nb], 1, dataSetSmallType, MPI_STATUS_IGNORE);
         MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(DataSetSmallMigration)),
@@ -500,7 +563,8 @@ void MPIIOMigrationCoProcessor::write4DArray(int step, Arrays arrayType, std::st
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::write4DArray time: " << finish - start << " s");
     }
@@ -519,7 +583,8 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -528,10 +593,10 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::write3DArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock           = true;
@@ -539,41 +604,42 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     int ic                    = 0;
     SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___Array;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].globalID =
-                block->getGlobalID(); // id of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].globalID = block->getGlobalID(); // id of the block needed to find it while regenerating the grid
 
-            switch (arrayType) {
+            switch (arrayType) 
+            {
                 case RelaxationFactor:
                     ___Array = block->getKernel()->getDataSet()->getRelaxationFactor();
                     break;
-                case PhaseField:
+                case PhaseField1:
                     ___Array = block->getKernel()->getDataSet()->getPhaseField();
                     break;
+                case PhaseField2:
+                    ___Array = block->getKernel()->getDataSet()->getPhaseField2();
+                    break;
                 default:
-                    UB_THROW(UbException(UB_EXARGS,
-                                         "MPIIOMigrationCoProcessor::write3DArray : 3D array type does not exist!"));
+                    UB_THROW(UbException(UB_EXARGS, "MPIIOMigrationCoProcessor::write3DArray : 3D array type does not exist!"));
                     break;
             }
 
             if (firstBlock) // when first (any) valid block...
             {
                 dataSetParamStr.nx1 = dataSetParamStr.nx2 = dataSetParamStr.nx3 = 0;
-                dataSetParamStr.nx[0]                                           = static_cast<int>(___Array->getNX1());
-                dataSetParamStr.nx[1]                                           = static_cast<int>(___Array->getNX2());
-                dataSetParamStr.nx[2]                                           = static_cast<int>(___Array->getNX3());
-                dataSetParamStr.nx[3]                                           = 1;
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                dataSetParamStr.nx[0] = static_cast<int>(___Array->getNX1());
+                dataSetParamStr.nx[1] = static_cast<int>(___Array->getNX2());
+                dataSetParamStr.nx[2] = static_cast<int>(___Array->getNX3());
+                dataSetParamStr.nx[3] = 1;
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
             if (___Array && (dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(),
-                                         ___Array->getDataVector().end());
+                doubleValuesArray.insert(doubleValuesArray.end(), ___Array->getDataVector().begin(), ___Array->getDataVector().end());
 
             ic++;
         }
@@ -583,10 +649,10 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::write3DArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -613,7 +679,8 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     size_t sizeofOneDataSet = sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double);
 
     MPI_Offset write_offset;
-    for (int nb = 0; nb < blocksCount; nb++) {
+    for (int nb = 0; nb < blocksCount; nb++) 
+    {
         write_offset = (MPI_Offset)(sizeof(dataSetParam) + dataSetSmallArray[nb].globalID * sizeofOneDataSet);
         MPI_File_write_at(file_handler, write_offset, &dataSetSmallArray[nb], 1, dataSetSmallType, MPI_STATUS_IGNORE);
         MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(DataSetSmallMigration)),
@@ -624,7 +691,8 @@ void MPIIOMigrationCoProcessor::write3DArray(int step, Arrays arrayType, std::st
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::write3DArray time: " << finish - start << " s");
     }
@@ -1352,10 +1420,10 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeBoundaryConds start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     int blocksCount          = 0; // quantity of blocks, that belong to this process
@@ -1366,7 +1434,8 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1381,25 +1450,27 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     int ic                         = 0;
     SPtr<BCArray3D> bcArr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
         {
             bcArr = block->getKernel()->getBCProcessor()->getBCArray();
 
-            bcAddArray[ic].globalID =
-                block->getGlobalID();                // id of the block needed to find it while regenerating the grid
+            bcAddArray[ic].globalID = block->getGlobalID();                // id of the block needed to find it while regenerating the grid
             bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
             bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
-            bytesCount[ic]                      = sizeof(BCAddMigration);
+            bytesCount[ic] = sizeof(BCAddMigration);
             bcVector[ic].resize(0);
             bcindexmatrixVector[ic].resize(0);
             indexContainerVector[ic].resize(0);
 
-            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) {
+            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
+            {
                 BoundaryCondition *bouCond = new BoundaryCondition();
-                if (bcArr->bcvector[bc] == NULL) {
+                if (bcArr->bcvector[bc] == NULL) 
                     memset(bouCond, 0, sizeof(BoundaryCondition));
-                } else {
+                else 
+                {
                     bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
                     bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
@@ -1428,20 +1499,18 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
                 bytesCount[ic] += sizeof(BoundaryCondition);
             }
 
-            if (bcindexmatrixCountNotInit) {
+            if (bcindexmatrixCountNotInit) 
+            {
                 boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
                 boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
                 boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
                 boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
                 bcindexmatrixCountNotInit            = false;
             }
-            bcindexmatrixVector[ic].insert(bcindexmatrixVector[ic].begin(),
-                                           bcArr->bcindexmatrix.getDataVector().begin(),
-                                           bcArr->bcindexmatrix.getDataVector().end());
+            bcindexmatrixVector[ic].insert(bcindexmatrixVector[ic].begin(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
             bytesCount[ic] += boundCondParamStr.bcindexmatrixCount * sizeof(int);
 
-            indexContainerVector[ic].insert(indexContainerVector[ic].begin(), bcArr->indexContainer.begin(),
-                                            bcArr->indexContainer.end());
+            indexContainerVector[ic].insert(indexContainerVector[ic].begin(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
             bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
             count_indexContainer += bcAddArray[ic].indexContainer_count;
             bytesCount[ic] += bcAddArray[ic].indexContainer_count * sizeof(int);
@@ -1455,10 +1524,10 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
     MPI_Type_commit(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: "<< Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -1479,11 +1548,15 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
 
     MPI_Offset write_offset = (MPI_Offset)(sizeof(boundCondParam) + grid->getNumberOfBlocks() * sizeof(size_t));
     size_t next_file_offset = 0;
-    if (size > 1) {
-        if (rank == 0) {
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
             next_file_offset = write_offset + allBytesCount;
             MPI_Send(&next_file_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             next_file_offset = write_offset + allBytesCount;
             if (rank < size - 1)
@@ -1495,7 +1568,8 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
 
     MPI_Offset write_offsetIndex;
 
-    for (int nb = 0; nb < blocksCount; nb++) {
+    for (int nb = 0; nb < blocksCount; nb++) 
+    {
         write_offsetIndex = (MPI_Offset)(sizeof(boundCondParam) + bcAddArray[nb].globalID * sizeof(size_t));
         MPI_File_write_at(file_handler, write_offsetIndex, &write_offset, 1, MPI_LONG_LONG_INT, MPI_STATUS_IGNORE);
 
@@ -1505,17 +1579,12 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
                               bcAddArray[nb].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
 
         if (bcindexmatrixVector[nb].size() > 0)
-            MPI_File_write_at(file_handler,
-                              (MPI_Offset)(write_offset + sizeof(BCAddMigration) +
-                                           bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition)),
+            MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(BCAddMigration) + bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition)),
                               &bcindexmatrixVector[nb][0], 1, bcindexmatrixType, MPI_STATUS_IGNORE);
 
         if (indexContainerVector[nb].size() > 0)
-            MPI_File_write_at(file_handler,
-                              (MPI_Offset)(write_offset + sizeof(BCAddMigration) +
-                                           bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition) +
-                                           boundCondParamStr.bcindexmatrixCount * sizeof(int)),
-                              &indexContainerVector[nb][0], bcAddArray[nb].indexContainer_count, MPI_INT,
+            MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(BCAddMigration) + bcAddArray[nb].boundCond_count * sizeof(BoundaryCondition) +
+                              boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerVector[nb][0], bcAddArray[nb].indexContainer_count, MPI_INT,
                               MPI_STATUS_IGNORE);
 
         write_offset += bytesCount[nb];
@@ -1525,7 +1594,8 @@ void MPIIOMigrationCoProcessor::writeBoundaryConds(int step)
     MPI_File_close(&file_handler);
     MPI_Type_free(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::writeBoundaryConds time: " << finish - start << " s");
     }
@@ -1547,8 +1617,7 @@ void MPIIOMigrationCoProcessor::restart(int step)
 
     readBlocks(step);
 
-    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased,
-                                                                      D3Q27System::BSW, MetisPartitioner::KWAY));
+    SPtr<Grid3DVisitor> metisVisitor(new MetisPartitioningGridVisitor(comm, MetisPartitioningGridVisitor::LevelBased, D3Q27System::BSW, MetisPartitioner::KWAY));
     grid->accept(metisVisitor);
 
     readDataSet(step);
@@ -1568,15 +1637,16 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
         start = MPI_Wtime();
 
+    bool multiPhase = false;
     size_t blocksCount = 0; // quantity of the blocks, that belong to this process
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
 
@@ -1584,7 +1654,8 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1592,22 +1663,21 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     DataSetMigration *dataSetArray = new DataSetMigration[blocksCount];
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
 
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
-    size_t doubleCountInBlock =
-        (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+    size_t doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3] * 2);
-    std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
+        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
+    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
+    //std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
 
     // define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
@@ -1617,95 +1687,145 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
     MPI_Offset read_offset;
     size_t sizeofOneDataSet = size_t(sizeof(DataSetMigration) + doubleCountInBlock * sizeof(double));
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
             read_offset = (MPI_Offset)(3 * sizeof(dataSetParam) + block->getGlobalID() * sizeofOneDataSet);
             MPI_File_read_at(file_handler, read_offset, &dataSetArray[ic], 1, dataSetType, MPI_STATUS_IGNORE);
             MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(DataSetMigration)),
-                             &doubleValuesArray[ic * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+                             &doubleValuesArrayF[ic * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
             ic++;
         }
     }
 
     MPI_File_close(&file_handler);
+
+    //----------------------------------------- H1 ----------------------------------------------------
+    MPI_Offset fsize;
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
+    {
+        multiPhase = true;
+        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
+
+        sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+
+        for (int level = minInitLevel; level <= maxInitLevel; level++)
+        {
+            for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
+            {
+                read_offset = (MPI_Offset)(block->getGlobalID() * sizeofOneDataSet);
+                MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[ic * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+                ic++;
+            }
+        }
+
+    }
+    MPI_File_close(&file_handler);
+
+    //----------------------------------------- H2 ----------------------------------------------------
+  /*filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+    sizeofOneDataSet = size_t(doubleCountInBlock * sizeof(double));
+    doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
+
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
+        for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
+        {
+            read_offset = (MPI_Offset)(block->getGlobalID() * sizeofOneDataSet);
+            MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[ic * doubleCountInBlock], 1, dataSetDoubleType, MPI_STATUS_IGNORE);
+            ic++;
+        }
+    }
+
+    MPI_File_close(&file_handler);*/
+
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readDataSet time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readDataSet start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH1, vectorsOfValuesH2, vectorsOfValuesH3;
-
-    size_t vectorSize1 =
-        dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
-    size_t vectorSize2 =
-        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
-    size_t vectorSize3 =
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-
-    for (std::size_t n = 0; n < blocksCount; n++) {
-        vectorsOfValuesF1.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize1);
+    std::vector<LBMReal> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<LBMReal> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    //std::vector<LBMReal> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+
+    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
+    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
+    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+
+    for (std::size_t n = 0; n < blocksCount; n++) 
+    {
+        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
+        if(multiPhase)
+            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
+        //vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
         index += vectorSize1;
 
-        vectorsOfValuesF2.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize2);
+        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
+        if (multiPhase)
+            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
+        //vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
         index += vectorSize2;
 
-        vectorsOfValuesF3.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize3);
-        index += vectorSize3;
-
-        vectorsOfValuesH1.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize1);
-        index += vectorSize1;
-
-        vectorsOfValuesH2.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize2);
-        index += vectorSize2;
-
-        vectorsOfValuesH3.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize3);
+        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
+        if (multiPhase)
+            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
+        //vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
         index += vectorSize3;
 
         SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0],
-                                                        dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                                                        dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0],
-                                                        dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                                                        dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
                     vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
 
-        SPtr<DistributionArray3D> mHdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH1, dataSetParamStr1.nx[0],
-                    dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                    dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH2, dataSetParamStr2.nx[0],
-                    dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                    dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                vectorsOfValuesH3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX1(dataSetParamStr1.nx1);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX2(dataSetParamStr1.nx2);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX3(dataSetParamStr1.nx3);
+       SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
+       if (multiPhase)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
+         }
+
+        /*SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);*/
 
         // find the nesessary block and fill it
         SPtr<Block3D> block = grid->getBlock(dataSetArray[n].globalID);
@@ -1716,17 +1836,22 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         kernel->setDeltaT(dataSetArray[n].deltaT);
         kernel->setCompressible(dataSetArray[n].compressible);
         kernel->setWithForcing(dataSetArray[n].withForcing);
+        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
+        kernel->setDensityRatio(dataSetArray[n].densityRatio);
+
         SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
         dataSetPtr->setFdistributions(mFdistributions);
-        dataSetPtr->setHdistributions(mHdistributions);
+        if (multiPhase)
+            dataSetPtr->setHdistributions(mH1distributions);
+        //dataSetPtr->setH2distributions(mH2distributions);
         kernel->setDataSet(dataSetPtr);
         block->setKernel(kernel);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readDataSet end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetArray;
@@ -1766,8 +1891,12 @@ void MPIIOMigrationCoProcessor::readDataSet(int step)
         readArray(step, RelaxationFactor, std::string("/cpRelaxationFactor.bin"));
     //   readRelaxationFactor(step);
  
-    if (arrPresence.isPhaseFieldPresent)
-        readArray(step, PhaseField, std::string("/cpPhaseField.bin"));
+    if (arrPresence.isPhaseField1Present)
+        readArray(step, PhaseField1, std::string("/cpPhaseField1.bin"));
+
+    if (arrPresence.isPhaseField2Present)
+        readArray(step, PhaseField2, std::string("/cpPhaseField2.bin"));
+
 }
 
 void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string fname)
@@ -1776,10 +1905,10 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -1799,7 +1928,8 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1807,8 +1937,7 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     MPI_File_read_at(file_handler, (MPI_Offset)0, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallMigration *dataSetSmallArray = new DataSetSmallMigration[blocksCount];
-    size_t doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    size_t doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -1819,7 +1948,8 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     MPI_Offset read_offset;
     size_t sizeofOneDataSet = size_t(sizeof(DataSetSmallMigration) + doubleCountInBlock * sizeof(double));
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
             read_offset = (MPI_Offset)(sizeof(dataSetParam) + block->getGlobalID() * sizeofOneDataSet);
@@ -1833,19 +1963,19 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readArray readArray: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     //----------------------------- restore data ---------------------------------
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (std::size_t n = 0; n < blocksCount; n++) {
         SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].globalID);
 
@@ -1856,35 +1986,31 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
         SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ___4DArray;
         SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> ___3DArray;
 
-        switch (arrType) {
+        switch (arrType) 
+        {
             case AverageDensity:
                 ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                    dataSetParamStr.nx[3]));
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageDensity(___4DArray);
                 break;
             case AverageVelocity:
                 ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                    dataSetParamStr.nx[3]));
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageVelocity(___4DArray);
                 break;
             case AverageFluktuations:
                 ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                    dataSetParamStr.nx[3]));
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageFluctuations(___4DArray);
                 break;
             case AverageTriple:
                 ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                    dataSetParamStr.nx[3]));
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setAverageTriplecorrelations(___4DArray);
                 break;
             case ShearStressVal:
                 ___4DArray = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(
-                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2],
-                    dataSetParamStr.nx[3]));
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
                 block->getKernel()->getDataSet()->setShearStressValues(___4DArray);
                 break;
             case RelaxationFactor:
@@ -1892,21 +2018,26 @@ void MPIIOMigrationCoProcessor::readArray(int step, Arrays arrType, std::string
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setRelaxationFactor(___3DArray);
                 break;
-            case PhaseField:
+            case PhaseField1:
                 ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
                     vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
                 block->getKernel()->getDataSet()->setPhaseField(___3DArray);
                 break;
+            case PhaseField2:
+                ___3DArray = CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                    vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
+                block->getKernel()->getDataSet()->setPhaseField2(___3DArray);
+                break;
             default:
                 UB_THROW(UbException(UB_EXARGS, "MPIIOMigrationCoProcessor::readArray : array type does not exist!"));
                 break;
         }
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -2548,10 +2679,10 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -2570,7 +2701,8 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -2585,12 +2717,12 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
     std::vector<int> bcindexmatrixV;
     std::vector<int> indexContainerV;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readBoundaryConds time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readBoundaryConds start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     MPI_File_read_at(file_handler, (MPI_Offset)0, &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
@@ -2599,7 +2731,8 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
 
     int ic = 0;
     MPI_Offset read_offset1, read_offset2;
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
             read_offset1 = (MPI_Offset)(sizeof(boundCondParam) + block->getGlobalID() * sizeof(size_t));
@@ -2611,31 +2744,30 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
             intArray1 = new int[boundCondParamStr.bcindexmatrixCount];
             intArray2 = new int[bcAddArray[ic].indexContainer_count];
 
-            if (bcAddArray[ic].boundCond_count > 0) {
+            if (bcAddArray[ic].boundCond_count > 0) 
+            {
                 MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAddMigration)), &bcArray[0],
                                  bcAddArray[ic].boundCond_count, boundCondType, MPI_STATUS_IGNORE);
             }
-            MPI_File_read_at(file_handler,
-                             (MPI_Offset)(read_offset2 + sizeof(BCAddMigration) +
-                                          bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition)),
+            MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAddMigration) + bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition)),
                              &intArray1[0], 1, bcindexmatrixType, MPI_STATUS_IGNORE);
-            if (bcAddArray[ic].indexContainer_count > 0) {
-                MPI_File_read_at(file_handler,
-                                 (MPI_Offset)(read_offset2 + sizeof(BCAddMigration) +
-                                              bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition) +
-                                              boundCondParamStr.bcindexmatrixCount * sizeof(int)),
-                                 &intArray2[0], bcAddArray[ic].indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
+            if (bcAddArray[ic].indexContainer_count > 0) 
+            {
+                MPI_File_read_at(file_handler, (MPI_Offset)(read_offset2 + sizeof(BCAddMigration) + bcAddArray[ic].boundCond_count * sizeof(BoundaryCondition) +
+                                 boundCondParamStr.bcindexmatrixCount * sizeof(int)), &intArray2[0], bcAddArray[ic].indexContainer_count, MPI_INT, MPI_STATUS_IGNORE);
             }
 
             bcindexmatrixV.resize(0);
             indexContainerV.resize(0);
             bcVector.resize(0);
 
-            for (int ibc = 0; ibc < bcAddArray[ic].boundCond_count; ibc++) {
+            for (int ibc = 0; ibc < bcAddArray[ic].boundCond_count; ibc++) 
+            {
                 SPtr<BoundaryConditions> bc;
                 if (memcmp(&bcArray[ibc], nullBouCond, sizeof(BoundaryCondition)) == 0)
                     bc = SPtr<BoundaryConditions>();
-                else {
+                else 
+                {
                     bc                         = SPtr<BoundaryConditions>(new BoundaryConditions);
                     bc->noslipBoundaryFlags    = bcArray[ibc].noslipBoundaryFlags;
                     bc->slipBoundaryFlags      = bcArray[ibc].slipBoundaryFlags;
@@ -2669,8 +2801,7 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
             for (int b2 = 0; b2 < bcAddArray[ic].indexContainer_count; b2++)
                 indexContainerV.push_back(intArray2[b2]);
 
-            CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2,
-                                               boundCondParamStr.nx3);
+            CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2, boundCondParamStr.nx3);
             SPtr<Block3D> block1 = grid->getBlock(bcAddArray[ic].globalID);
 
             SPtr<BCProcessor> bcProc = bcProcessor->clone(block1->getKernel());
@@ -2694,10 +2825,10 @@ void MPIIOMigrationCoProcessor::readBoundaryConds(int step)
 
     delete nullBouCond;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIOMigrationCoProcessor::readBoundaryConds end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 }
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
index ad7a93086afa379822fc7909a68fd39748dd607f..ca0de8f3e7ba315bc8a870f89063ea9f38d7b59f 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIOMigrationCoProcessor.h
@@ -25,7 +25,8 @@ public:
         AverageTriple       = 4,
         ShearStressVal      = 5,
         RelaxationFactor = 6,
-        PhaseField = 7
+        PhaseField1 = 7,
+        PhaseField2 = 8
     };
 
     MPIIOMigrationCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm);
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
index ed5d3b275c006700d29c43f16928d2ddc08827f0..02e471c4d652a88335c29f481b6c08b15dcc49e8 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.cpp
@@ -25,8 +25,7 @@
 
 using namespace MPIIODataStructures;
 
-MPIIORestartCoProcessor::MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path,
-                                                 SPtr<Communicator> comm)
+MPIIORestartCoProcessor::MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbScheduler> s, const std::string &path, SPtr<Communicator> comm)
     : MPIIOCoProcessor(grid, s, path, comm)
 {
     memset(&boundCondParamStr, 0, sizeof(boundCondParamStr));
@@ -34,7 +33,7 @@ MPIIORestartCoProcessor::MPIIORestartCoProcessor(SPtr<Grid3D> grid, SPtr<UbSched
     //-------------------------   define MPI types  ---------------------------------
 
     MPI_Datatype typesDataSet[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
-    int blocksDataSet[3]         = { 2, 5, 2 };
+    int blocksDataSet[3]         = { 5, 5, 2 };
     MPI_Aint offsetsDatatSet[3], lbDataSet, extentDataSet;
 
     offsetsDatatSet[0] = 0;
@@ -80,7 +79,8 @@ MPIIORestartCoProcessor::~MPIIORestartCoProcessor()
 //////////////////////////////////////////////////////////////////////////
 void MPIIORestartCoProcessor::process(double step)
 {
-    if (scheduler->isDue(step)) {
+    if (scheduler->isDue(step)) 
+    {
         if (comm->isRoot())
             UBLOG(logINFO, "MPIIORestartCoProcessor save step: " << step);
         if (comm->isRoot())
@@ -130,56 +130,77 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
 
     dataSetParam dataSetParamStr1, dataSetParamStr2, dataSetParamStr3;
     DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
+    std::vector<double> doubleValuesArrayF; // double-values (arrays of f's) in all blocks  Fdistribution
+    std::vector<double> doubleValuesArrayH1; // double-values (arrays of f's) in all blocks  H1distribution
+    // std::vector<double> doubleValuesArrayH2; // double-values (arrays of f's) in all blocks  H2distribution
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
+    bool multiPhase = false;
     DSArraysPresence arrPresence;
     bool firstBlock        = true;
     int doubleCountInBlock = 0;
     int ic                 = 0;
 
-    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH;
-    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH;
-    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH;
+    SPtr<D3Q27EsoTwist3DSplittedVector> D3Q27EsoTwist3DSplittedVectorPtrF, D3Q27EsoTwist3DSplittedVectorPtrH1, D3Q27EsoTwist3DSplittedVectorPtrH2;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributionsF, localDistributionsH1, localDistributionsH2;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributionsF, nonLocalDistributionsH1, nonLocalDistributionsH2;
+    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr zeroDistributionsF, zeroDistributionsH1, zeroDistributionsH2;
  
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    SPtr<LBMKernel> kernel;
+
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetArray[ic].x2              = block->getX2();
-            dataSetArray[ic].x3              = block->getX3();
-            dataSetArray[ic].level           = block->getLevel();
-            dataSetArray[ic].ghostLayerWidth = block->getKernel()->getGhostLayerWidth();
-            dataSetArray[ic].collFactor      = block->getKernel()->getCollisionFactor();
-            dataSetArray[ic].deltaT          = block->getKernel()->getDeltaT();
-            dataSetArray[ic].compressible    = block->getKernel()->getCompressible();
-            dataSetArray[ic].withForcing     = block->getKernel()->getWithForcing();
-
-            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getFdistributions());
+            kernel = dynamicPointerCast<LBMKernel>(block->getKernel());
+
+            dataSetArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetArray[ic].x2 = block->getX2();
+            dataSetArray[ic].x3 = block->getX3();
+            dataSetArray[ic].level = block->getLevel();
+            dataSetArray[ic].ghostLayerWidth = kernel->getGhostLayerWidth();
+            dataSetArray[ic].collFactor = kernel->getCollisionFactor();
+            dataSetArray[ic].deltaT = kernel->getDeltaT();
+            dataSetArray[ic].compressible = kernel->getCompressible();
+            dataSetArray[ic].withForcing = kernel->getWithForcing();
+            dataSetArray[ic].collFactorL = kernel->getCollisionFactorL();
+            dataSetArray[ic].collFactorG = kernel->getCollisionFactorG();
+            dataSetArray[ic].densityRatio = kernel->getDensityRatio();
+
+            D3Q27EsoTwist3DSplittedVectorPtrF = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getFdistributions());
             localDistributionsF    = D3Q27EsoTwist3DSplittedVectorPtrF->getLocalDistributions();
             nonLocalDistributionsF = D3Q27EsoTwist3DSplittedVectorPtrF->getNonLocalDistributions();
             zeroDistributionsF     = D3Q27EsoTwist3DSplittedVectorPtrF->getZeroDistributions();
 
-            D3Q27EsoTwist3DSplittedVectorPtrH = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(
-                block->getKernel()->getDataSet()->getHdistributions());
-            localDistributionsH    = D3Q27EsoTwist3DSplittedVectorPtrH->getLocalDistributions();
-            nonLocalDistributionsH = D3Q27EsoTwist3DSplittedVectorPtrH->getNonLocalDistributions();
-            zeroDistributionsH     = D3Q27EsoTwist3DSplittedVectorPtrH->getZeroDistributions();
+            D3Q27EsoTwist3DSplittedVectorPtrH1 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getHdistributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH1 != 0)
+            {
+                multiPhase = true;
+                localDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getLocalDistributions();
+                nonLocalDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getNonLocalDistributions();
+                zeroDistributionsH1 = D3Q27EsoTwist3DSplittedVectorPtrH1->getZeroDistributions();
+            }
+
+            /*D3Q27EsoTwist3DSplittedVectorPtrH2 = dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(block->getKernel()->getDataSet()->getH2distributions());
+            if (D3Q27EsoTwist3DSplittedVectorPtrH2 != 0)
+            {
+                localDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getLocalDistributions();
+                nonLocalDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getNonLocalDistributions();
+                zeroDistributionsH2 = D3Q27EsoTwist3DSplittedVectorPtrH2->getZeroDistributions();
+            }*/
 
             if (firstBlock) // when first (any) valid block...
             {
@@ -204,96 +225,88 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
                 }
 
                 // ... than save some parameters that are equal in all dataSets
-                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
-                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
-                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 =
-                    static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
-
-             //  Fdistributions + Hdistributions
-                doubleCountInBlock =
-                    (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+                dataSetParamStr1.nx1 = dataSetParamStr2.nx1 = dataSetParamStr3.nx1 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX1());
+                dataSetParamStr1.nx2 = dataSetParamStr2.nx2 = dataSetParamStr3.nx2 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX2());
+                dataSetParamStr1.nx3 = dataSetParamStr2.nx3 = dataSetParamStr3.nx3 = static_cast<int>(block->getKernel()->getDataSet()->getFdistributions()->getNX3());
+
+                doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
                      dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3]) * 2;
+                     dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray =
-                    block->getKernel()->getDataSet()->getAverageDensity();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray = kernel->getDataSet()->getAverageDensity();
                 if (averageDensityArray)
                     arrPresence.isAverageDensityArrayPresent = true;
                 else
                     arrPresence.isAverageDensityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageVelocity();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr = kernel->getDataSet()->getAverageVelocity();
                 if (AverageVelocityArray3DPtr)
                     arrPresence.isAverageVelocityArrayPresent = true;
                 else
                     arrPresence.isAverageVelocityArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageFluctuations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr = kernel->getDataSet()->getAverageFluctuations();
                 if (AverageFluctArray3DPtr)
                     arrPresence.isAverageFluktuationsArrayPresent = true;
                 else
                     arrPresence.isAverageFluktuationsArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr =
-                    block->getKernel()->getDataSet()->getAverageTriplecorrelations();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr = kernel->getDataSet()->getAverageTriplecorrelations();
                 if (AverageTripleArray3DPtr)
                     arrPresence.isAverageTripleArrayPresent = true;
                 else
                     arrPresence.isAverageTripleArrayPresent = false;
 
-                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr =
-                    block->getKernel()->getDataSet()->getShearStressValues();
+                SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr = kernel->getDataSet()->getShearStressValues();
                 if (ShearStressValArray3DPtr)
                     arrPresence.isShearStressValArrayPresent = true;
                 else
                     arrPresence.isShearStressValArrayPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr =
-                    block->getKernel()->getDataSet()->getRelaxationFactor();
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> relaxationFactor3DPtr = kernel->getDataSet()->getRelaxationFactor();
                 if (relaxationFactor3DPtr)
                     arrPresence.isRelaxationFactorPresent = true;
                 else
                     arrPresence.isRelaxationFactorPresent = false;
 
-                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr =
-                    block->getKernel()->getDataSet()->getPhaseField();
-                if (phaseField3DPtr)
-                    arrPresence.isPhaseFieldPresent = true;
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr1 = kernel->getDataSet()->getPhaseField();
+                if (phaseField3DPtr1)
+                    arrPresence.isPhaseField1Present = true;
+                else
+                    arrPresence.isPhaseField1Present = false;
+
+                SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> phaseField3DPtr2 = kernel->getDataSet()->getPhaseField2();
+                if (phaseField3DPtr2)
+                    arrPresence.isPhaseField2Present = true;
                 else
-                    arrPresence.isPhaseFieldPresent = false;
+                    arrPresence.isPhaseField2Present = false;
 
                 firstBlock = false;
             }
 
-            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsF->getDataVector().begin(),
-                                         localDistributionsF->getDataVector().end());
-            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsF->getDataVector().begin(),
-                                         nonLocalDistributionsF->getDataVector().end());
-            if (zeroDistributionsF && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) &&
-                (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsF->getDataVector().begin(),
-                                         zeroDistributionsF->getDataVector().end());
-
-            if (localDistributionsH && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) &&
-                (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), localDistributionsH->getDataVector().begin(),
-                                         localDistributionsH->getDataVector().end());
-            if (nonLocalDistributionsH && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) &&
-                (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), nonLocalDistributionsH->getDataVector().begin(),
-                                         nonLocalDistributionsH->getDataVector().end());
-            if (zeroDistributionsH && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) &&
-                (dataSetParamStr3.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), zeroDistributionsH->getDataVector().begin(),
-                                         zeroDistributionsH->getDataVector().end());
+            if (localDistributionsF && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), localDistributionsF->getDataVector().begin(), localDistributionsF->getDataVector().end());
+            if (nonLocalDistributionsF && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), nonLocalDistributionsF->getDataVector().begin(), nonLocalDistributionsF->getDataVector().end());
+            if (zeroDistributionsF && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                doubleValuesArrayF.insert(doubleValuesArrayF.end(), zeroDistributionsF->getDataVector().begin(), zeroDistributionsF->getDataVector().end());
+
+            if (multiPhase)
+            {
+                if (localDistributionsH1 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), localDistributionsH1->getDataVector().begin(), localDistributionsH1->getDataVector().end());
+                if (nonLocalDistributionsH1 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), nonLocalDistributionsH1->getDataVector().begin(), nonLocalDistributionsH1->getDataVector().end());
+                if (zeroDistributionsH1 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                    doubleValuesArrayH1.insert(doubleValuesArrayH1.end(), zeroDistributionsH1->getDataVector().begin(), zeroDistributionsH1->getDataVector().end());
+            }
+
+            /*if (localDistributionsH2 && (dataSetParamStr1.nx[0] > 0) && (dataSetParamStr1.nx[1] > 0) && (dataSetParamStr1.nx[2] > 0) && (dataSetParamStr1.nx[3] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), localDistributionsH2->getDataVector().begin(), localDistributionsH2->getDataVector().end());
+            if (nonLocalDistributionsH2 && (dataSetParamStr2.nx[0] > 0) && (dataSetParamStr2.nx[1] > 0) && (dataSetParamStr2.nx[2] > 0) && (dataSetParamStr2.nx[3] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), nonLocalDistributionsH2->getDataVector().begin(), nonLocalDistributionsH2->getDataVector().end());
+            if (zeroDistributionsH2 && (dataSetParamStr3.nx[0] > 0) && (dataSetParamStr3.nx[1] > 0) && (dataSetParamStr3.nx[2] > 0))
+                doubleValuesArrayH2.insert(doubleValuesArrayH2.end(), zeroDistributionsH2->getDataVector().begin(), zeroDistributionsH2->getDataVector().end());*/
 
             ic++;
         }
@@ -303,10 +316,10 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -315,15 +328,17 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + 3 * sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -342,7 +357,7 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
 #endif
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
@@ -351,27 +366,54 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     MPI_File_write_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, write_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1,
-                      dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1,
-                      dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
     // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount,
-                      dataSetType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
-    if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
-                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    if (doubleValuesArrayF.size() > 0)
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
+                          &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
 
+    //------------------------------------------------------------------------------------------------------------------
+    if (multiPhase)
+    {
+        filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+        rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+        if (rc != MPI_SUCCESS)
+            throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+        // each process writes the dataSet arrays
+        if (doubleValuesArrayH1.size() > 0)
+            MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+
+        MPI_File_sync(file_handler);
+        MPI_File_close(&file_handler);
+    }
+
+    //--------------------------------------------------------------------------------------------------------------------
+    /*filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+    // each process writes the dataSet arrays
+    if (doubleValuesArrayH1.size() > 0)
+        MPI_File_write_at(file_handler, write_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+
+    MPI_File_sync(file_handler);
+    MPI_File_close(&file_handler);*/
+
+    //--------------------------------
     MPI_Type_free(&dataSetDoubleType);
 
     delete[] dataSetArray;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeDataSet time: " << finish - start << " s");
     }
@@ -403,8 +445,11 @@ void MPIIORestartCoProcessor::writeDataSet(int step)
     if (arrPresence.isRelaxationFactorPresent)
         writeRelaxationFactor(step);
 
-    if (arrPresence.isPhaseFieldPresent)
-        writePhaseField(step);
+    if (arrPresence.isPhaseField1Present)
+        writePhaseField(step, 1);
+
+    if (arrPresence.isPhaseField2Present)
+        writePhaseField(step, 2);
 }
 
 void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
@@ -418,7 +463,8 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -427,10 +473,10 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     std::vector<double> doubleValuesArray; // double-values of the AverageDensityArray in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -438,13 +484,13 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> averageDensityArray;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x2 = block->getX2();
+            dataSetSmallArray[ic].x3 = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
 
             averageDensityArray = block->getKernel()->getDataSet()->getAverageDensity();
@@ -462,10 +508,8 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), averageDensityArray->getDataVector().begin(),
-                                         averageDensityArray->getDataVector().end());
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
+                doubleValuesArray.insert(doubleValuesArray.end(), averageDensityArray->getDataVector().begin(), averageDensityArray->getDataVector().end());
 
             ic++;
         }
@@ -475,10 +519,10 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -487,15 +531,17 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -524,19 +570,18 @@ void MPIIORestartCoProcessor::writeAverageDensityArray(int step)
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
     // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageDensityArray time: " << finish - start << " s");
     }
@@ -555,7 +600,8 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -564,10 +610,10 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -575,13 +621,13 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageVelocityArray3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
-            dataSetSmallArray[ic].x2    = block->getX2();
-            dataSetSmallArray[ic].x3    = block->getX3();
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x2 = block->getX2();
+            dataSetSmallArray[ic].x3 = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
 
             AverageVelocityArray3DPtr = block->getKernel()->getDataSet()->getAverageVelocity();
@@ -593,16 +639,13 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(AverageVelocityArray3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(AverageVelocityArray3DPtr->getNX3());
                 dataSetParamStr.nx[3] = static_cast<int>(AverageVelocityArray3DPtr->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(),
-                                         AverageVelocityArray3DPtr->getDataVector().end());
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
+                doubleValuesArray.insert(doubleValuesArray.end(), AverageVelocityArray3DPtr->getDataVector().begin(), AverageVelocityArray3DPtr->getDataVector().end());
 
             ic++;
         }
@@ -612,10 +655,10 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -624,15 +667,16 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -661,19 +705,18 @@ void MPIIORestartCoProcessor::writeAverageVelocityArray(int step)
     // each process writes common parameters of a dataSet
     MPI_File_write_at(file_handler, write_offset, &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
     // each process writes data identifying blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                      dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
 
     MPI_Type_free(&dataSetDoubleType);
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageVelocityArray time: " << finish - start << " s");
     }
@@ -692,7 +735,8 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -701,10 +745,10 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -712,11 +756,11 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageFluctArray3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
             dataSetSmallArray[ic].x2    = block->getX2();
             dataSetSmallArray[ic].x3    = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
@@ -730,14 +774,12 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(AverageFluctArray3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(AverageFluctArray3DPtr->getNX3());
                 dataSetParamStr.nx[3] = static_cast<int>(AverageFluctArray3DPtr->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&(dataSetParamStr.nx[3] > 0))
                 doubleValuesArray.insert(doubleValuesArray.end(), AverageFluctArray3DPtr->getDataVector().begin(),
                                          AverageFluctArray3DPtr->getDataVector().end());
 
@@ -749,10 +791,10 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -761,15 +803,16 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -788,8 +831,7 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
 #endif
 
     MPI_File file_handler;
-    std::string filename =
-        path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpAverageFluktuationsArray.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
@@ -803,15 +845,15 @@ void MPIIORestartCoProcessor::writeAverageFluktuationsArray(int step)
                       dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageFluktuationsArray time: " << finish - start << " s");
     }
@@ -830,7 +872,8 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -839,10 +882,10 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -850,11 +893,11 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> AverageTripleArray3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
             dataSetSmallArray[ic].x2    = block->getX2();
             dataSetSmallArray[ic].x3    = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
@@ -868,16 +911,13 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(AverageTripleArray3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(AverageTripleArray3DPtr->getNX3());
                 dataSetParamStr.nx[3] = static_cast<int>(AverageTripleArray3DPtr->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(),
-                                         AverageTripleArray3DPtr->getDataVector().end());
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
+                doubleValuesArray.insert(doubleValuesArray.end(), AverageTripleArray3DPtr->getDataVector().begin(), AverageTripleArray3DPtr->getDataVector().end());
 
             ic++;
         }
@@ -887,10 +927,10 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -899,15 +939,17 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -940,8 +982,7 @@ void MPIIORestartCoProcessor::writeAverageTripleArray(int step)
                       dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
@@ -967,7 +1008,8 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -976,10 +1018,10 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -987,11 +1029,11 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
     int ic                 = 0;
     SPtr<CbArray4D<LBMReal, IndexerX4X3X2X1>> ShearStressValArray3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
             dataSetSmallArray[ic].x2    = block->getX2();
             dataSetSmallArray[ic].x3    = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
@@ -1005,14 +1047,12 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(ShearStressValArray3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(ShearStressValArray3DPtr->getNX3());
                 dataSetParamStr.nx[3] = static_cast<int>(ShearStressValArray3DPtr->getNX4());
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
 
-            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) &&
-                (dataSetParamStr.nx[3] > 0))
+            if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0) && (dataSetParamStr.nx[3] > 0))
                 doubleValuesArray.insert(doubleValuesArray.end(), ShearStressValArray3DPtr->getDataVector().begin(),
                                          ShearStressValArray3DPtr->getDataVector().end());
 
@@ -1024,10 +1064,10 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -1036,15 +1076,17 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -1077,15 +1119,15 @@ void MPIIORestartCoProcessor::writeShearStressValArray(int step)
                       dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeShearStressValArray time: " << finish - start << " s");
     }
@@ -1104,7 +1146,8 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1113,10 +1156,10 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -1124,11 +1167,11 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     int ic                 = 0;
     SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> RelaxationFactor3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
-            dataSetSmallArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
             dataSetSmallArray[ic].x2    = block->getX2();
             dataSetSmallArray[ic].x3    = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
@@ -1142,8 +1185,7 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(RelaxationFactor3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(RelaxationFactor3DPtr->getNX3());
                 dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
@@ -1160,10 +1202,10 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -1172,15 +1214,17 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -1213,15 +1257,15 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
                       dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeRelaxationFactor time: " << finish - start << " s");
     }
@@ -1229,7 +1273,7 @@ void MPIIORestartCoProcessor::writeRelaxationFactor(int step)
     delete[] dataSetSmallArray;
 }
 
-void MPIIORestartCoProcessor::writePhaseField(int step)
+void MPIIORestartCoProcessor::writePhaseField(int step, int fieldN)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -1240,7 +1284,8 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1249,10 +1294,10 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
     std::vector<double> doubleValuesArray; // double-values (arrays of f's) in all blocks
     dataSetParam dataSetParamStr;
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     bool firstBlock        = true;
@@ -1260,7 +1305,8 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
     int ic                 = 0;
     SPtr<CbArray3D<LBMReal, IndexerX3X2X1>> PhaseField3DPtr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) //	blocks of the current level
         {
             dataSetSmallArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
@@ -1268,7 +1314,10 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
             dataSetSmallArray[ic].x3 = block->getX3();
             dataSetSmallArray[ic].level = block->getLevel();
 
-            PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField();
+            if(fieldN == 1)
+                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField();
+            else
+                PhaseField3DPtr = block->getKernel()->getDataSet()->getPhaseField2();
 
             if (firstBlock) // when first (any) valid block...
             {
@@ -1277,28 +1326,25 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
                 dataSetParamStr.nx[1] = static_cast<int>(PhaseField3DPtr->getNX2());
                 dataSetParamStr.nx[2] = static_cast<int>(PhaseField3DPtr->getNX3());
                 dataSetParamStr.nx[3] = 1;
-                doubleCountInBlock =
-                    dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+                doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
 
                 firstBlock = false;
             }
-
             if ((dataSetParamStr.nx[0] > 0) && (dataSetParamStr.nx[1] > 0) && (dataSetParamStr.nx[2] > 0))
-                doubleValuesArray.insert(doubleValuesArray.end(), PhaseField3DPtr->getDataVector().begin(),
-                                         PhaseField3DPtr->getDataVector().end());
+                doubleValuesArray.insert(doubleValuesArray.end(), PhaseField3DPtr->getDataVector().begin(), PhaseField3DPtr->getDataVector().end());
 
             ic++;
         }
     }
-
+        
     // register new MPI-types depending on the block-specific information
     MPI_Type_contiguous(doubleCountInBlock, MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     // write to the file
@@ -1307,15 +1353,17 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_write_offset = write_offset + sizeof(dataSetParam) +
-                                blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_write_offset = write_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
@@ -1334,7 +1382,9 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
 #endif
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField.bin";
+    std::string filename;
+    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
+    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, info, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
@@ -1348,15 +1398,15 @@ void MPIIORestartCoProcessor::writePhaseField(int step)
                       dataSetSmallType, MPI_STATUS_IGNORE);
     // each process writes the dataSet arrays
     if (doubleValuesArray.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                           &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writePhaseField time: " << finish - start << " s");
     }
@@ -1370,10 +1420,10 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start collect data rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     int blocksCount          = 0; // quantity of blocks in the grid, max 2147483648 blocks!
@@ -1384,7 +1434,8 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     std::vector<SPtr<Block3D>> blocksVector[25];
     int minInitLevel = this->grid->getCoarsestInitializedLevel();
     int maxInitLevel = this->grid->getFinestInitializedLevel();
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         grid->getBlocks(level, rank, blocksVector[level]);
         blocksCount += static_cast<int>(blocksVector[level].size());
     }
@@ -1394,27 +1445,31 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     std::vector<int> bcindexmatrixV;
     std::vector<int> indexContainerV;
     bool bcindexmatrixCountNotInit = true;
-    int ic                         = 0;
+    int ic = 0;
     SPtr<BCArray3D> bcArr;
 
-    for (int level = minInitLevel; level <= maxInitLevel; level++) {
+    for (int level = minInitLevel; level <= maxInitLevel; level++) 
+    {
         for (SPtr<Block3D> block : blocksVector[level]) // all the blocks of the current level
         {
             bcArr = block->getKernel()->getBCProcessor()->getBCArray();
 
-            bcAddArray[ic].x1 =
-                block->getX1(); // coordinates of the block needed to find it while regenerating the grid
+            bcAddArray[ic].x1 = block->getX1(); // coordinates of the block needed to find it while regenerating the grid
             bcAddArray[ic].x2                   = block->getX2();
             bcAddArray[ic].x3                   = block->getX3();
             bcAddArray[ic].level                = block->getLevel();
             bcAddArray[ic].boundCond_count      = 0; // how many BoundaryConditions in this block
             bcAddArray[ic].indexContainer_count = 0; // how many indexContainer-values in this block
 
-            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) {
+            for (std::size_t bc = 0; bc < bcArr->getBCVectorSize(); bc++) 
+            {
                 BoundaryCondition *bouCond = new BoundaryCondition();
-                if (bcArr->bcvector[bc] == NULL) {
+                if (bcArr->bcvector[bc] == NULL) 
+                {
                     memset(bouCond, 0, sizeof(BoundaryCondition));
-                } else {
+                } 
+                else 
+                {
                     bouCond->noslipBoundaryFlags    = bcArr->bcvector[bc]->getNoSlipBoundary();
                     bouCond->slipBoundaryFlags      = bcArr->bcvector[bc]->getSlipBoundary();
                     bouCond->velocityBoundaryFlags  = bcArr->bcvector[bc]->getVelocityBoundary();
@@ -1445,15 +1500,15 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
             // the quantity of elements in the bcindexmatrix array (CbArray3D<int, IndexerX3X2X1>) in bcArray(BCArray3D)
             // is always equal, this will be the size of the "write-read-block" in MPI_write_.../MPI_read-functions when
             // writing/reading BoundConds
-            if (bcindexmatrixCountNotInit) {
+            if (bcindexmatrixCountNotInit) 
+            {
                 boundCondParamStr.nx1                = static_cast<int>(bcArr->bcindexmatrix.getNX1());
                 boundCondParamStr.nx2                = static_cast<int>(bcArr->bcindexmatrix.getNX2());
                 boundCondParamStr.nx3                = static_cast<int>(bcArr->bcindexmatrix.getNX3());
                 boundCondParamStr.bcindexmatrixCount = static_cast<int>(bcArr->bcindexmatrix.getDataVector().size());
                 bcindexmatrixCountNotInit            = false;
             }
-            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(),
-                                  bcArr->bcindexmatrix.getDataVector().end());
+            bcindexmatrixV.insert(bcindexmatrixV.end(), bcArr->bcindexmatrix.getDataVector().begin(), bcArr->bcindexmatrix.getDataVector().end());
 
             indexContainerV.insert(indexContainerV.end(), bcArr->indexContainer.begin(), bcArr->indexContainer.end());
             bcAddArray[ic].indexContainer_count = static_cast<int>(bcArr->indexContainer.size());
@@ -1470,7 +1525,8 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     int bcBlockCount = (int)(count_boundCond / BLOCK_SIZE);
     if (bcBlockCount * BLOCK_SIZE < (int)count_boundCond)
         bcBlockCount += 1;
-    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) {
+    for (int i = (int)count_boundCond; i < bcBlockCount * BLOCK_SIZE; i++) 
+    {
         BoundaryCondition *bouCond = new BoundaryCondition();
         memset(bouCond, 0, sizeof(BoundaryCondition));
         bcVector.push_back(*bouCond);
@@ -1485,11 +1541,15 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     MPI_Offset write_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
     size_t next_write_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
             next_write_offset = write_offset + byteCount;
             MPI_Send(&next_write_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&write_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             next_write_offset = write_offset + byteCount;
             if (rank < size - 1)
@@ -1497,10 +1557,10 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
         }
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     double start, finish;
@@ -1526,14 +1586,11 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
     // each process writes the quantity of it's blocks
     MPI_File_write_at(file_handler, write_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     // each process writes the quantity of "big blocks" of BLOCK_SIZE of boundary conditions
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + sizeof(int)), &bcBlockCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     // each process writes the quantity of indexContainer elements in all blocks
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,
-                      MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 2 * sizeof(int)), &count_indexContainer, 1, MPI_INT,  MPI_STATUS_IGNORE);
     // each process writes the quantity of bcindexmatrix elements in every block
-    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1,
-                      boundCondParamType, MPI_STATUS_IGNORE);
+    MPI_File_write_at(file_handler, (MPI_Offset)(write_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
 
     // each process writes data identifying the blocks
     MPI_File_write_at(file_handler, write_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
@@ -1543,23 +1600,19 @@ void MPIIORestartCoProcessor::writeBoundaryConds(int step)
                           bcBlockCount, boundCondType1000, MPI_STATUS_IGNORE);
     // each process writes bcindexmatrix values
     if (bcindexmatrixV.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) +
-                                       bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition)),
                           &bcindexmatrixV[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
     // each process writes indexContainer values
     if (indexContainerV.size() > 0)
-        MPI_File_write_at(file_handler,
-                          (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) +
-                                       bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
-                                       blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)),
-                          &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
+        MPI_File_write_at(file_handler, (MPI_Offset)(write_offset + blocksCount * sizeof(BCAddRestart) + bcBlockCount * BLOCK_SIZE * sizeof(BoundaryCondition) +
+                      blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &indexContainerV[0], count_indexContainer, MPI_INT, MPI_STATUS_IGNORE);
 
     MPI_File_sync(file_handler);
     MPI_File_close(&file_handler);
     MPI_Type_free(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::writeBoundaryConds time: " << finish - start << " s");
     }
@@ -1593,17 +1646,18 @@ void MPIIORestartCoProcessor::readDataSet(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
+
     double start, finish;
     if (comm->isRoot())
         start = MPI_Wtime();
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSet.bin";
+    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetF.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
@@ -1611,6 +1665,7 @@ void MPIIORestartCoProcessor::readDataSet(int step)
     // calculate the read offset
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
+    bool multiPhase = false;
 
     // read count of blocks
     int blocksCount = 0;
@@ -1618,122 +1673,146 @@ void MPIIORestartCoProcessor::readDataSet(int step)
 
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     MPI_File_read_at(file_handler, read_offset, &dataSetParamStr1, 1, dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1,
-                     dataSetParamType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1,
-                     dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), &dataSetParamStr2, 1, dataSetParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 2 * sizeof(dataSetParam)), &dataSetParamStr3, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetRestart *dataSetArray = new DataSetRestart[blocksCount];
-    double doubleCountInBlock =
-        (dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
+    double doubleCountInBlock = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3] +
         dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3] +
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3]) * 2;
-    std::vector<double> doubleValuesArray(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks
+        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+    std::vector<double> doubleValuesArrayF(size_t(blocksCount * doubleCountInBlock)); // double-values in all blocks  Fdistributions
+    std::vector<double> doubleValuesArrayH1; // double-values in all blocks  H1distributions
+    //std::vector<double> doubleValuesArrayH2; // double-values in all blocks  H2distributions
 
     //   define MPI_types depending on the block-specific information
     MPI_Type_contiguous(int(doubleCountInBlock), MPI_DOUBLE, &dataSetDoubleType);
     MPI_Type_commit(&dataSetDoubleType);
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + 3 * sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
+            next_read_offset = read_offset + 3 * sizeof(dataSetParam) + blocksCount * (sizeof(DataSetRestart) + size_t(doubleCountInBlock) * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount,
-                     dataSetType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler,
-                     (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
-                     &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam)), dataSetArray, blocksCount, dataSetType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + 3 * sizeof(dataSetParam) + blocksCount * sizeof(DataSetRestart)),
+                     &doubleValuesArrayF[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_close(&file_handler);
+
+ //-------------------------------------- H1 -----------------------------
+    MPI_Offset fsize;
+    filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH1.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+    MPI_File_get_size(file_handler, &fsize);
+    if (fsize > 0)
+    {
+        multiPhase = true;
+        doubleValuesArrayH1.resize(blocksCount * doubleCountInBlock);
+        MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH1[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    }
     MPI_File_close(&file_handler);
+
+    //-------------------------------------- H2 -----------------------------
+       /*filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpDataSetH2.bin";
+    rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
+    if (rc != MPI_SUCCESS)
+        throw UbException(UB_EXARGS, "couldn't open file " + filename);
+
+    doubleValuesArrayH2.resize(blocksCount * doubleCountInBlock);
+    MPI_File_read_at(file_handler, read_offset, &doubleValuesArrayH2[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
+    MPI_File_close(&file_handler);*/
+
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
-
+    
     size_t index = 0;
-    std::vector<double> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
-    std::vector<double> vectorsOfValuesH1, vectorsOfValuesH2, vectorsOfValuesH3;
-    size_t vectorSize1 =
-        dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
-    size_t vectorSize2 =
-        dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
-    size_t vectorSize3 =
-        dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
-
-    for (int n = 0; n < blocksCount; n++) {
-        vectorsOfValuesF1.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize1);
-        index += vectorSize1;
-
-        vectorsOfValuesF2.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize2);
-        index += vectorSize2;
-
-        vectorsOfValuesF3.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize3);
-        index += vectorSize3;
-
-        vectorsOfValuesH1.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize1);
+    std::vector<LBMReal> vectorsOfValuesF1, vectorsOfValuesF2, vectorsOfValuesF3;
+    std::vector<LBMReal> vectorsOfValuesH11, vectorsOfValuesH12, vectorsOfValuesH13;
+    //std::vector<LBMReal> vectorsOfValuesH21, vectorsOfValuesH22, vectorsOfValuesH23;
+    size_t vectorSize1 = dataSetParamStr1.nx[0] * dataSetParamStr1.nx[1] * dataSetParamStr1.nx[2] * dataSetParamStr1.nx[3];
+    size_t vectorSize2 = dataSetParamStr2.nx[0] * dataSetParamStr2.nx[1] * dataSetParamStr2.nx[2] * dataSetParamStr2.nx[3];
+    size_t vectorSize3 = dataSetParamStr3.nx[0] * dataSetParamStr3.nx[1] * dataSetParamStr3.nx[2] * dataSetParamStr3.nx[3];
+
+    for (int n = 0; n < blocksCount; n++) 
+    {
+        vectorsOfValuesF1.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize1);
+        if (multiPhase)
+            vectorsOfValuesH11.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize1);
+        //vectorsOfValuesH21.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize1);
         index += vectorSize1;
 
-        vectorsOfValuesH2.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize2);
+        vectorsOfValuesF2.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize2);
+        if (multiPhase)
+            vectorsOfValuesH12.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize2);
+        //vectorsOfValuesH22.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize2);
         index += vectorSize2;
 
-        vectorsOfValuesH3.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + vectorSize3);
+        vectorsOfValuesF3.assign(doubleValuesArrayF.data() + index, doubleValuesArrayF.data() + index + vectorSize3);
+        if (multiPhase)
+            vectorsOfValuesH13.assign(doubleValuesArrayH1.data() + index, doubleValuesArrayH1.data() + index + vectorSize3);
+        //vectorsOfValuesH23.assign(doubleValuesArrayH2.data() + index, doubleValuesArrayH2.data() + index + vectorSize3);
         index += vectorSize3;
 
         SPtr<DistributionArray3D> mFdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0],
-                                                        dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                                                        dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0],
-                                                        dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                                                        dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)
-            ->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF1, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesF2, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
                     vectorsOfValuesF3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
 
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX1(dataSetParamStr1.nx1);
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX2(dataSetParamStr1.nx2);
         dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mFdistributions)->setNX3(dataSetParamStr1.nx3);
 
-        SPtr<DistributionArray3D> mHdistributions(new D3Q27EsoTwist3DSplittedVector());
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH1, dataSetParamStr1.nx[0],
-                                                        dataSetParamStr1.nx[1], dataSetParamStr1.nx[2],
-                                                        dataSetParamStr1.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH2, dataSetParamStr2.nx[0],
-                                                        dataSetParamStr2.nx[1], dataSetParamStr2.nx[2],
-                                                        dataSetParamStr2.nx[3])));
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)
-            ->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
-                    vectorsOfValuesH3, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
-
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX1(dataSetParamStr1.nx1);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX2(dataSetParamStr1.nx2);
-        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mHdistributions)->setNX3(dataSetParamStr1.nx3);
-
+        SPtr<DistributionArray3D> mH1distributions(new D3Q27EsoTwist3DSplittedVector());
+        if (multiPhase)
+        {
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH11, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH12, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                vectorsOfValuesH13, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX1(dataSetParamStr1.nx1);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX2(dataSetParamStr1.nx2);
+            dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH1distributions)->setNX3(dataSetParamStr1.nx3);
+        }
+        /*SPtr<DistributionArray3D> mH2distributions(new D3Q27EsoTwist3DSplittedVector());
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH21, dataSetParamStr1.nx[0], dataSetParamStr1.nx[1], dataSetParamStr1.nx[2], dataSetParamStr1.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNonLocalDistributions(CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
+                new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValuesH22, dataSetParamStr2.nx[0], dataSetParamStr2.nx[1], dataSetParamStr2.nx[2], dataSetParamStr2.nx[3])));
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setZeroDistributions(CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr(new CbArray3D<LBMReal, IndexerX3X2X1>(
+                vectorsOfValuesH23, dataSetParamStr3.nx[0], dataSetParamStr3.nx[1], dataSetParamStr3.nx[2])));
+
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX1(dataSetParamStr1.nx1);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX2(dataSetParamStr1.nx2);
+        dynamicPointerCast<D3Q27EsoTwist3DSplittedVector>(mH2distributions)->setNX3(dataSetParamStr1.nx3);*/
+         
         // find the nesessary block and fill it
-        SPtr<Block3D> block =
-            grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetArray[n].x1, dataSetArray[n].x2, dataSetArray[n].x3, dataSetArray[n].level);
+   
         this->lbmKernel->setBlock(block);
         SPtr<LBMKernel> kernel = this->lbmKernel->clone();
         kernel->setGhostLayerWidth(dataSetArray[n].ghostLayerWidth);
@@ -1741,17 +1820,22 @@ void MPIIORestartCoProcessor::readDataSet(int step)
         kernel->setDeltaT(dataSetArray[n].deltaT);
         kernel->setCompressible(dataSetArray[n].compressible);
         kernel->setWithForcing(dataSetArray[n].withForcing);
+        kernel->setCollisionFactorMultiphase(dataSetArray[n].collFactorL, dataSetArray[n].collFactorG);
+        kernel->setDensityRatio(dataSetArray[n].densityRatio);
+
         SPtr<DataSet3D> dataSetPtr = SPtr<DataSet3D>(new DataSet3D());
         dataSetPtr->setFdistributions(mFdistributions);
-        dataSetPtr->setHdistributions(mHdistributions);
+        if (multiPhase)
+            dataSetPtr->setHdistributions(mH1distributions);
+        //dataSetPtr->setH2distributions(mH2distributions);
         kernel->setDataSet(dataSetPtr);
         block->setKernel(kernel);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readDataSet end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetArray;
@@ -1785,8 +1869,11 @@ void MPIIORestartCoProcessor::readDataSet(int step)
     if (arrPresence.isRelaxationFactorPresent)
         readRelaxationFactor(step);
 
-    if (arrPresence.isPhaseFieldPresent)
-        readPhaseField(step);
+    if (arrPresence.isPhaseField1Present)
+        readPhaseField(step, 1);
+
+    if (arrPresence.isPhaseField2Present)
+        readPhaseField(step, 2);
 }
 
 void MPIIORestartCoProcessor::readAverageDensityArray(int step)
@@ -1795,10 +1882,10 @@ void MPIIORestartCoProcessor::readAverageDensityArray(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -1816,12 +1903,10 @@ void MPIIORestartCoProcessor::readAverageDensityArray(int step)
     memset(&dataSetParamStr, 0, sizeof(dataSetParam));
 
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -1832,61 +1917,59 @@ void MPIIORestartCoProcessor::readAverageDensityArray(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
         // fill mAverageDensity arrays
         SPtr<AverageValuesArray3D> mAverageDensity;
-        mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1],
-                                                    dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+        mAverageDensity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
+            dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setAverageDensity(mAverageDensity);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageDensityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -1898,10 +1981,10 @@ void MPIIORestartCoProcessor::readAverageVelocityArray(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -1917,12 +2000,10 @@ void MPIIORestartCoProcessor::readAverageVelocityArray(int step)
     int blocksCount = 0;
     dataSetParam dataSetParamStr;
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -1933,61 +2014,59 @@ void MPIIORestartCoProcessor::readAverageVelocityArray(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
         // fill mAverageVelocity array
         SPtr<AverageValuesArray3D> mAverageVelocity;
-        mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1],
-                                                    dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+        mAverageVelocity = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], 
+            dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setAverageVelocity(mAverageVelocity);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageVelocityArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -1999,10 +2078,10 @@ void MPIIORestartCoProcessor::readAverageFluktuationsArray(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -2019,12 +2098,10 @@ void MPIIORestartCoProcessor::readAverageFluktuationsArray(int step)
     int blocksCount = 0;
     dataSetParam dataSetParamStr;
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -2035,62 +2112,59 @@ void MPIIORestartCoProcessor::readAverageFluktuationsArray(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray time: " << finish - start << " s");
-        UBLOG(logINFO,
-              "MPIIORestartCoProcessor::readAverageFluktuationsArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray start of restore of data, rank = " << rank);
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
         // fill AverageFluktuations array
         SPtr<AverageValuesArray3D> mAverageFluktuations;
-        mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1],
-                                                    dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+        mAverageFluktuations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
+                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setAverageFluctuations(mAverageFluktuations);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageFluktuationsArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -2102,10 +2176,10 @@ void MPIIORestartCoProcessor::readAverageTripleArray(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -2121,12 +2195,10 @@ void MPIIORestartCoProcessor::readAverageTripleArray(int step)
     int blocksCount = 0;
     dataSetParam dataSetParamStr;
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -2137,61 +2209,59 @@ void MPIIORestartCoProcessor::readAverageTripleArray(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
         // fill AverageTriplecorrelations array
         SPtr<AverageValuesArray3D> mAverageTriplecorrelations;
-        mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1],
-                                                    dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+        mAverageTriplecorrelations = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
+                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setAverageTriplecorrelations(mAverageTriplecorrelations);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readAverageTripleArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -2203,10 +2273,10 @@ void MPIIORestartCoProcessor::readShearStressValArray(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -2222,12 +2292,10 @@ void MPIIORestartCoProcessor::readShearStressValArray(int step)
     int blocksCount = 0;
     dataSetParam dataSetParamStr;
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -2238,61 +2306,59 @@ void MPIIORestartCoProcessor::readShearStressValArray(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
         // fill ShearStressValuesArray array
         SPtr<ShearStressValuesArray3D> mShearStressValues;
-        mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(
-            new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1],
-                                                    dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
+        mShearStressValues = CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr(new CbArray4D<LBMReal, IndexerX4X3X2X1>(vectorsOfValues, 
+                dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2], dataSetParamStr.nx[3]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setShearStressValues(mShearStressValues);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readShearStressValArray end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -2304,10 +2370,10 @@ void MPIIORestartCoProcessor::readRelaxationFactor(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -2323,12 +2389,10 @@ void MPIIORestartCoProcessor::readRelaxationFactor(int step)
     int blocksCount = 0;
     dataSetParam dataSetParamStr;
     MPI_File_read_at(file_handler, (MPI_Offset)(rank * sizeof(int)), &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(size * sizeof(int)), &dataSetParamStr, 1, dataSetParamType, MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -2339,41 +2403,41 @@ void MPIIORestartCoProcessor::readRelaxationFactor(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
     size_t nextVectorSize =
         dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
+    std::vector<LBMReal> vectorsOfValues;
     for (int n = 0; n < blocksCount; n++) {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
@@ -2384,37 +2448,38 @@ void MPIIORestartCoProcessor::readRelaxationFactor(int step)
             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
         block->getKernel()->getDataSet()->setRelaxationFactor(mRelaxationFactor);
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readRelaxationFactor end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
 }
 
-void MPIIORestartCoProcessor::readPhaseField(int step)
+void MPIIORestartCoProcessor::readPhaseField(int step, int fieldN)
 {
     int rank, size;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
         start = MPI_Wtime();
 
     MPI_File file_handler;
-    std::string filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField.bin";
+    std::string filename;
+    if(fieldN == 1) filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField1.bin";
+    else filename = path + "/mpi_io_cp/mpi_io_cp_" + UbSystem::toString(step) + "/cpPhaseField2.bin";
     int rc = MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &file_handler);
     if (rc != MPI_SUCCESS)
         throw UbException(UB_EXARGS, "couldn't open file " + filename);
@@ -2427,8 +2492,7 @@ void MPIIORestartCoProcessor::readPhaseField(int step)
                      MPI_STATUS_IGNORE);
 
     DataSetSmallRestart *dataSetSmallArray = new DataSetSmallRestart[blocksCount];
-    int doubleCountInBlock =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    int doubleCountInBlock = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
     std::vector<double> doubleValuesArray(blocksCount * doubleCountInBlock); // double-values in all blocks
 
     // define MPI_types depending on the block-specific information
@@ -2439,42 +2503,43 @@ void MPIIORestartCoProcessor::readPhaseField(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * sizeof(int));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + sizeof(dataSetParam) +
-                               blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
+            next_read_offset = read_offset + sizeof(dataSetParam) + blocksCount * (sizeof(DataSetSmallRestart) + doubleCountInBlock * sizeof(double));
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
         }
     }
 
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount,
-                     dataSetSmallType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam)), dataSetSmallArray, blocksCount, dataSetSmallType, MPI_STATUS_IGNORE);
     if (doubleCountInBlock > 0)
-        MPI_File_read_at(file_handler,
-                         (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
+        MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + sizeof(dataSetParam) + blocksCount * sizeof(DataSetSmallRestart)),
                          &doubleValuesArray[0], blocksCount, dataSetDoubleType, MPI_STATUS_IGNORE);
     MPI_File_close(&file_handler);
     MPI_Type_free(&dataSetDoubleType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     size_t index = 0;
-    size_t nextVectorSize =
-        dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
-    std::vector<double> vectorsOfValues;
-    for (int n = 0; n < blocksCount; n++) {
+    size_t nextVectorSize = dataSetParamStr.nx[0] * dataSetParamStr.nx[1] * dataSetParamStr.nx[2] * dataSetParamStr.nx[3];
+    std::vector<LBMReal> vectorsOfValues;
+
+    for (int n = 0; n < blocksCount; n++)
+    {
         vectorsOfValues.assign(doubleValuesArray.data() + index, doubleValuesArray.data() + index + nextVectorSize);
         index += nextVectorSize;
 
@@ -2484,15 +2549,18 @@ void MPIIORestartCoProcessor::readPhaseField(int step)
             vectorsOfValues, dataSetParamStr.nx[0], dataSetParamStr.nx[1], dataSetParamStr.nx[2]));
 
         // find the nesessary block and fill it
-        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3,
-                                             dataSetSmallArray[n].level);
-        block->getKernel()->getDataSet()->setPhaseField(mPhaseField);
+        SPtr<Block3D> block = grid->getBlock(dataSetSmallArray[n].x1, dataSetSmallArray[n].x2, dataSetSmallArray[n].x3, dataSetSmallArray[n].level);
+        if(fieldN == 1)
+            block->getKernel()->getDataSet()->setPhaseField(mPhaseField);
+        else
+            block->getKernel()->getDataSet()->setPhaseField2(mPhaseField);
+
     }
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    { 
         UBLOG(logINFO, "MPIIORestartCoProcessor::readPhaseField end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     delete[] dataSetSmallArray;
@@ -2504,10 +2572,10 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
     MPI_Comm_size(MPI_COMM_WORLD, &size);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds start MPI IO rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
     double start, finish;
     if (comm->isRoot())
@@ -2527,14 +2595,11 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
     // read count of blocks
     MPI_File_read_at(file_handler, read_offset1, &blocksCount, 1, MPI_INT, MPI_STATUS_IGNORE);
     // read count of big BoundaryCondition blocks
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + sizeof(int)), &dataCount1000, 1, MPI_INT,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + sizeof(int)), &dataCount1000, 1, MPI_INT, MPI_STATUS_IGNORE);
     // read count of indexContainer values in all blocks
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + 2 * sizeof(int)), &dataCount2, 1, MPI_INT,
-                     MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + 2 * sizeof(int)), &dataCount2, 1, MPI_INT, MPI_STATUS_IGNORE);
     // read count of bcindexmatrix values in every block
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1,
-                     boundCondParamType, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset1 + 3 * sizeof(int)), &boundCondParamStr, 1, boundCondParamType, MPI_STATUS_IGNORE);
 
     MPI_Type_contiguous(boundCondParamStr.bcindexmatrixCount, MPI_INT, &bcindexmatrixType);
     MPI_Type_commit(&bcindexmatrixType);
@@ -2550,16 +2615,18 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
     MPI_Offset read_offset  = (MPI_Offset)(size * (3 * sizeof(int) + sizeof(boundCondParam)));
     size_t next_read_offset = 0;
 
-    if (size > 1) {
-        if (rank == 0) {
-            next_read_offset = read_offset + blocksCount * sizeof(BCAddRestart) +
-                               dataCount * sizeof(BoundaryCondition) +
+    if (size > 1) 
+    {
+        if (rank == 0) 
+        {
+            next_read_offset = read_offset + blocksCount * sizeof(BCAddRestart) + dataCount * sizeof(BoundaryCondition) +
                                (blocksCount * boundCondParamStr.bcindexmatrixCount + dataCount2) * sizeof(int);
             MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, 1, 5, MPI_COMM_WORLD);
-        } else {
+        } 
+        else 
+        {
             MPI_Recv(&read_offset, 1, MPI_LONG_LONG_INT, rank - 1, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
-            next_read_offset = read_offset + blocksCount * sizeof(BCAddRestart) +
-                               dataCount * sizeof(BoundaryCondition) +
+            next_read_offset = read_offset + blocksCount * sizeof(BCAddRestart) + dataCount * sizeof(BoundaryCondition) +
                                (blocksCount * boundCondParamStr.bcindexmatrixCount + dataCount2) * sizeof(int);
             if (rank < size - 1)
                 MPI_Send(&next_read_offset, 1, MPI_LONG_LONG_INT, rank + 1, 5, MPI_COMM_WORLD);
@@ -2567,27 +2634,21 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
     }
 
     MPI_File_read_at(file_handler, read_offset, bcAddArray, blocksCount, boundCondTypeAdd, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart)), &bcArray[0],
-                     dataCount1000, boundCondType1000, MPI_STATUS_IGNORE);
-    MPI_File_read_at(
-        file_handler,
-        (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart) + dataCount * sizeof(BoundaryCondition)),
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart)), &bcArray[0], dataCount1000, boundCondType1000, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart) + dataCount * sizeof(BoundaryCondition)),
         &intArray1[0], blocksCount, bcindexmatrixType, MPI_STATUS_IGNORE);
-    MPI_File_read_at(file_handler,
-                     (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart) +
-                                  dataCount * sizeof(BoundaryCondition) +
-                                  blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)),
-                     &intArray2[0], dataCount2, MPI_INT, MPI_STATUS_IGNORE);
+    MPI_File_read_at(file_handler, (MPI_Offset)(read_offset + blocksCount * sizeof(BCAddRestart) + dataCount * sizeof(BoundaryCondition) +
+                                  blocksCount * boundCondParamStr.bcindexmatrixCount * sizeof(int)), &intArray2[0], dataCount2, MPI_INT, MPI_STATUS_IGNORE);
 
     MPI_File_close(&file_handler);
     MPI_Type_free(&bcindexmatrixType);
 
-    if (comm->isRoot()) {
+    if (comm->isRoot()) 
+    {
         finish = MPI_Wtime();
         UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds time: " << finish - start << " s");
         UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds start of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 
     int index = 0, index1 = 0, index2 = 0;
@@ -2595,16 +2656,19 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
     std::vector<int> bcindexmatrixV;
     std::vector<int> indexContainerV;
 
-    for (int n = 0; n < blocksCount; n++) {
+    for (int n = 0; n < blocksCount; n++) 
+    {
         bcVector.resize(0);
         bcindexmatrixV.resize(0);
         indexContainerV.resize(0);
 
-        for (int ibc = 0; ibc < bcAddArray[n].boundCond_count; ibc++) {
+        for (int ibc = 0; ibc < bcAddArray[n].boundCond_count; ibc++) 
+        {
             SPtr<BoundaryConditions> bc;
             if (memcmp(&bcArray[index], nullBouCond, sizeof(BoundaryCondition)) == 0)
                 bc = SPtr<BoundaryConditions>();
-            else {
+            else 
+            {
                 bc                         = SPtr<BoundaryConditions>(new BoundaryConditions);
                 bc->noslipBoundaryFlags    = bcArray[index].noslipBoundaryFlags;
                 bc->slipBoundaryFlags      = bcArray[index].slipBoundaryFlags;
@@ -2639,8 +2703,7 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
         for (int b2 = 0; b2 < bcAddArray[n].indexContainer_count; b2++)
             indexContainerV.push_back(intArray2[index2++]);
 
-        CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2,
-                                           boundCondParamStr.nx3);
+        CbArray3D<int, IndexerX3X2X1> bcim(bcindexmatrixV, boundCondParamStr.nx1, boundCondParamStr.nx2, boundCondParamStr.nx3);
 
         SPtr<Block3D> block = grid->getBlock(bcAddArray[n].x1, bcAddArray[n].x2, bcAddArray[n].x3, bcAddArray[n].level);
         SPtr<BCProcessor> bcProc = bcProcessor->clone(block->getKernel());
@@ -2661,8 +2724,7 @@ void MPIIORestartCoProcessor::readBoundaryConds(int step)
 
     if (comm->isRoot()) {
         UBLOG(logINFO, "MPIIORestartCoProcessor::readBoundaryConds end of restore of data, rank = " << rank);
-        UBLOG(logINFO, "Physical Memory currently used by current process: "
-                           << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
+        UBLOG(logINFO, "Physical Memory currently used by current process: " << Utilities::getPhysMemUsedByMe() / 1073741824.0 << " GB");
     }
 }
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
index cbcf8c553943aa325f415cd123ae1fbe0bf4dcf3..57f559769a06d9a87a968ada73fbaba712da789b 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/MPIIORestartCoProcessor.h
@@ -4,6 +4,7 @@
 #include <mpi.h>
 //#include <PointerDefinitions.h>
 #include <string>
+#include <vector>
 
 #include "MPIIOCoProcessor.h"
 #include "MPIIODataStructures.h"
@@ -35,8 +36,8 @@ public:
     void writeAverageTripleArray(int step);
     void writeShearStressValArray(int step);
     void writeRelaxationFactor(int step);
-    void writePhaseField(int step);
-   //! Writes the boundary conditions of the blocks into the file cpBC.bin
+    void writePhaseField(int step, int num);
+    //! Writes the boundary conditions of the blocks into the file cpBC.bin
     void writeBoundaryConds(int step);
 
     //! Reads the blocks of the grid from the file cpBlocks.bin
@@ -49,7 +50,7 @@ public:
     void readAverageTripleArray(int step);
     void readShearStressValArray(int step);
     void readRelaxationFactor(int step);
-    void readPhaseField(int step);
+    void readPhaseField(int step, int num);
     //! Reads the boundary conditions of the blocks from the file cpBC.bin
     void readBoundaryConds(int step);
     //! The function sets LBMKernel
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
index b350f441b3b237032ca24cd4a3743c3cf629a89e..1c7e47f52e721069b20a6b3c27d1e71857ef74ab 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/PressureCoefficientCoProcessor.cpp
@@ -42,8 +42,8 @@ void PressureCoefficientCoProcessor::collectData(double step)
 //////////////////////////////////////////////////////////////////////////
 void PressureCoefficientCoProcessor::calculateRho()
 {
-    double f[D3Q27System::ENDF + 1];
-    double vx1, vx2, vx3, rho;
+    LBMReal f[D3Q27System::ENDF + 1];
+    LBMReal vx1, vx2, vx3, rho;
     std::vector<double> values;
     std::vector<double> rvalues;
 
diff --git a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
index 3519c83a529314e1135f3d76e21c3b2c3c3f8cba..e98d6ac874ace46659bc2903b3c67a0f9f93fa24 100644
--- a/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
+++ b/src/cpu/VirtualFluidsCore/CoProcessors/WriteMacroscopicQuantitiesCoProcessor.cpp
@@ -211,33 +211,25 @@ void WriteMacroscopicQuantitiesCoProcessor::addDataMQ(SPtr<Block3D> block)
 
                     distributions->getDistribution(f, ix1, ix2, ix3);
                     calcMacros(f, rho, vx1, vx2, vx3);
-                    double press = D3Q27System::getPressure(f); // D3Q27System::calcPress(f,rho,vx1,vx2,vx3);
+                    //double press = D3Q27System::getPressure(f); // D3Q27System::calcPress(f,rho,vx1,vx2,vx3);
 
                     if (UbMath::isNaN(rho) || UbMath::isInfinity(rho))
-                        // UB_THROW( UbException(UB_EXARGS,"rho is not a number (nan or -1.#IND) or infinity number
-                        // -1.#INF in block="+block->toString()+",
-                        // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
-                        rho = 999.0;
-                    if (UbMath::isNaN(press) || UbMath::isInfinity(press))
+                         UB_THROW( UbException(UB_EXARGS,"rho is not a number (nan or -1.#IND) or infinity number -1.#INF in block="+block->toString()+",node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
+                        //rho = 999.0;
+                    //if (UbMath::isNaN(press) || UbMath::isInfinity(press))
                         // UB_THROW( UbException(UB_EXARGS,"press is not a number (nan or -1.#IND) or infinity number
                         // -1.#INF in block="+block->toString()+",
                         // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
-                        press = 999.0;
+                        //press = 999.0;
                     if (UbMath::isNaN(vx1) || UbMath::isInfinity(vx1))
-                        // UB_THROW( UbException(UB_EXARGS,"vx1 is not a number (nan or -1.#IND) or infinity number
-                        // -1.#INF in block="+block->toString()+",
-                        // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
-                        vx1 = 999.0;
+                         UB_THROW( UbException(UB_EXARGS,"vx1 is not a number (nan or -1.#IND) or infinity number -1.#INF in block="+block->toString()+", node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
+                        //vx1 = 999.0;
                     if (UbMath::isNaN(vx2) || UbMath::isInfinity(vx2))
-                        // UB_THROW( UbException(UB_EXARGS,"vx2 is not a number (nan or -1.#IND) or infinity number
-                        // -1.#INF in block="+block->toString()+",
-                        // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
-                        vx2 = 999.0;
+                         UB_THROW( UbException(UB_EXARGS,"vx2 is not a number (nan or -1.#IND) or infinity number -1.#INF in block="+block->toString()+", node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
+                        //vx2 = 999.0;
                     if (UbMath::isNaN(vx3) || UbMath::isInfinity(vx3))
-                        // UB_THROW( UbException(UB_EXARGS,"vx3 is not a number (nan or -1.#IND) or infinity number
-                        // -1.#INF in block="+block->toString()+",
-                        // node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
-                        vx3 = 999.0;
+                         UB_THROW( UbException(UB_EXARGS,"vx3 is not a number (nan or -1.#IND) or infinity number -1.#INF in block="+block->toString()+", node="+UbSystem::toString(ix1)+","+UbSystem::toString(ix2)+","+UbSystem::toString(ix3)));
+                        //vx3 = 999.0;
 
                     data[index++].push_back(rho);
                     data[index++].push_back(vx1);
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.h b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.h
index 6d8877ee909183dcb4088ccb77f6726e83447ba8..fb04de68aaa4ab360e38ae83c9d47d077c05e59f 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullDirectConnector.h
@@ -48,9 +48,9 @@ public:
     void sendVectors() override;
 
 protected:
-    virtual inline void updatePointers() = 0;
+    virtual void updatePointers() = 0;
     void exchangeData();
-    virtual inline void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) = 0;
+    virtual void exchangeData(int x1From, int x2From, int x3From, int x1To, int x2To, int x3To) = 0;
 
     int maxX1;
     int maxX2;
diff --git a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.h b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.h
index c2853d4a81bdb3497e235a9115f7cf5260425117..39fc3d1afa3fb958b09d128bd67a5aca42acbc03 100644
--- a/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.h
+++ b/src/cpu/VirtualFluidsCore/Connectors/FullVectorConnector.h
@@ -55,11 +55,11 @@ public:
     void distributeReceiveVectors() override;
 
 protected:
-    virtual inline void updatePointers() = 0;
+    virtual void updatePointers() = 0;
     void fillData();
     void distributeData();
-    virtual inline void fillData(vector_type &sdata, int &index, int x1, int x2, int x3) = 0;
-    virtual inline void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) = 0;
+    virtual void fillData(vector_type &sdata, int &index, int x1, int x2, int x3) = 0;
+    virtual void distributeData(vector_type &rdata, int &index, int x1, int x2, int x3) = 0;
     
     int maxX1;
     int maxX2;
diff --git a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
index b8b541546f3c2cccb49ff09c859b8c97c2e22f63..e53e38a74daea2a2a40ca53eff1aa1f4febcc27a 100644
--- a/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
+++ b/src/cpu/VirtualFluidsCore/Data/DataSet3D.h
@@ -91,23 +91,18 @@ public:
 protected:
 private:
     SPtr<DistributionArray3D> fdistributions;
-
     SPtr<DistributionArray3D> hdistributions;
-
     //SPtr<DistributionArray3D> h1distributions;
     SPtr<DistributionArray3D> h2distributions;
-
+ 
     SPtr<AverageValuesArray3D> averageValues;
-
     SPtr<AverageValuesArray3D> averageDensity;
     SPtr<AverageValuesArray3D> averageVelocity;
     SPtr<AverageValuesArray3D> averageFluktuations;
     SPtr<AverageValuesArray3D> averageTriplecorrelations;
-
     SPtr<ShearStressValuesArray3D> shearStressValues;
 
     SPtr<RelaxationFactorArray3D> relaxationFactor;
-
     SPtr<PhaseFieldArray3D> phaseField;
     SPtr<PhaseFieldArray3D> phaseField2;
 };
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
index e7144a7c6f67d1a9feb8a5883a046420750f1c1b..37c6c47f05215b1d210ef943e639b1cb957ca63d 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.cpp
@@ -58,7 +58,7 @@ void D3Q27TriFaceMeshInteractor::initInteractor(const double &timeStep)
     setQs(timeStep);
 }
 //////////////////////////////////////////////////////////////////////////
-bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D> block/*,const double& orgX1,const double& orgX2,const double& orgX3,const double& blockLengthX1,const double& blockLengthX2,const double& blockLengthX3, const double& timestep*/)
+bool D3Q27TriFaceMeshInteractor::setDifferencesToGbObject3D(const SPtr<Block3D> block)
 {
     if (!block)
         return false;
diff --git a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
index ebd707a41ca454cb1e8b22f0020cfd98cadfab36..9ac8bfc48a4fda3612b0781d93496cce723d2cd8 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/D3Q27TriFaceMeshInteractor.h
@@ -42,7 +42,7 @@ public:
     void setQs(const double &timeStep);
     void refineBlockGridToLevel(int level, double startDistance, double stopDistance);
 
-    bool setDifferencesToGbObject3D(const SPtr<Block3D> block/*,const double& orgX1,const double& orgX2,const double& orgX3,const double& blockLengthX1,const double& blockLengthX2,const double& blockLengthX3, const double& timestep=0*/) override;
+    bool setDifferencesToGbObject3D(const SPtr<Block3D> block) override;
 
     void setRegardPointInObjectTest(bool opt) { this->regardPIOTest = opt; }
 
diff --git a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
index 74627b76addaf6badaea678d1c4a20b274234b3a..9727bf636085c7c0d24a9108acc71925af36e5d1 100644
--- a/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
+++ b/src/cpu/VirtualFluidsCore/Interactors/Interactor3D.h
@@ -76,12 +76,7 @@ public:
     SPtr<Grid3D> getGrid3D() const { return grid.lock(); }
     void setGrid3D(SPtr<Grid3D> grid) { this->grid = grid; }
     virtual SPtr<GbObject3D> getGbObject3D() const { return geoObject3D; }
-    virtual bool setDifferencesToGbObject3D(const SPtr<Block3D>)
-    {
-        // UBLOG(logINFO, "Interactor3D::setDifferencesToGbObject3D()");
-        return false;
-    }
-
+    virtual bool setDifferencesToGbObject3D(const SPtr<Block3D>) = 0;
     virtual std::vector<SPtr<Block3D>> &getBcBlocks() { return this->bcBlocks; }
     virtual void removeBcBlocks() { this->bcBlocks.clear(); }
     virtual std::vector<SPtr<Block3D>> &getSolidBlockSet() { return this->solidBlocks; }
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
index 280562ebfaff7ccaa61055fa7caed0a4cac4d666..20851b019a3a0abd2c8865c7c40530e73bcf6245 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.cpp
@@ -5,8 +5,9 @@
 #include "D3Q27EsoTwist3DSplittedVector.h"
 #include "D3Q27System.h"
 #include "DataSet3D.h"
+#include "Block3D.h"
 
-//#define PROOF_CORRECTNESS
+#define PROOF_CORRECTNESS
 
 //////////////////////////////////////////////////////////////////////////
 BGKLBMKernel::BGKLBMKernel() { this->compressible = false; }
@@ -22,6 +23,7 @@ void BGKLBMKernel::initDataSet()
 SPtr<LBMKernel> BGKLBMKernel::clone()
 {
     SPtr<LBMKernel> kernel(new BGKLBMKernel());
+    kernel->setNX(nx);
     std::dynamic_pointer_cast<BGKLBMKernel>(kernel)->initDataSet();
     kernel->setCollisionFactor(this->collFactor);
     kernel->setBCProcessor(bcProcessor->clone(kernel));
@@ -30,10 +32,12 @@ SPtr<LBMKernel> BGKLBMKernel::clone()
     kernel->setForcingX2(muForcingX2);
     kernel->setForcingX3(muForcingX3);
     kernel->setIndex(ix1, ix2, ix3);
+    kernel->setDeltaT(deltaT);
+    kernel->setBlock(block.lock());
     return kernel;
 }
 //////////////////////////////////////////////////////////////////////////
-void BGKLBMKernel::calculate(int /*step*/)
+void BGKLBMKernel::calculate(int step)
 {
     using namespace D3Q27System;
     using namespace UbMath;
@@ -250,7 +254,10 @@ void BGKLBMKernel::calculate(int /*step*/)
                     if (dif > 10.0E-15 || dif < -10.0E-15)
 #endif
                     {
-                        UB_THROW(UbException(UB_EXARGS, "rho is not correct"));
+                      UB_THROW(UbException(UB_EXARGS, "rho="+UbSystem::toString(drho)+", rho_post="+UbSystem::toString(rho_post)
+                         +" dif="+UbSystem::toString(dif)
+                         +" rho is not correct for node "+UbSystem::toString(x1)+","+UbSystem::toString(x2)+","+UbSystem::toString(x3)
+                         +" in " + block.lock()->toString()+" step = "+UbSystem::toString(step)));
                     }
 #endif
                     //////////////////////////////////////////////////////////////////////////
diff --git a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
index e998267c083c604d6387acf71d2e069315e595c8..9d17a8cc7677db7a142f4340dcdeaf38e268d214 100644
--- a/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/BGKLBMKernel.h
@@ -1,5 +1,5 @@
-#ifndef LBMKERNELETD3Q27BGK_H
-#define LBMKERNELETD3Q27BGK_H
+#ifndef BGKLBMKernel_H
+#define BGKLBMKernel_H
 
 #include "LBMKernel.h"
 #include "basics/container/CbArray3D.h"
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
index daed493b9cc1afddbd92acabcd551da0f463ea26..9c52f5e0469ce5b6c6e08136f25d3c027d36bdbd 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.cpp
@@ -39,6 +39,7 @@
 #include "Block3D.h"
 #include "BCArray3D.h"
 
+
 #define PROOF_CORRECTNESS
 
 using namespace UbMath;
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
index 10cfd49264bb829eac1fc6b9bedeee3b6eace265..3052f50e3e48925ed21a3c3af68cc2d247c88a02 100644
--- a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernel.h
@@ -144,4 +144,4 @@ inline void CumulantK17LBMKernel::backwardChimera(LBMReal& mfa, LBMReal& mfb, LB
     mfa = ma;
 }
 
-#endif // CumulantK17LBMKernel_h__
\ No newline at end of file
+#endif // CumulantK17LBMKernel_h__
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2bdc7ce97be90a45b2bebb3dca53f3eb609bec3b
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.cpp
@@ -0,0 +1,337 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CumulantK17LBMKernel.cpp
+//! \ingroup LBM
+//! \author Konstantin Kutscher, Martin Geier
+//=======================================================================================
+#include <lbm/KernelParameter.h>
+#include <lbm/CumulantChimera.h>
+#include <lbm/constants/D3Q27.h>
+
+#include "CumulantK17LBMKernelUnified.h"
+#include "D3Q27System.h"
+#include "D3Q27EsoTwist3DSplittedVector.h"
+#include <cmath>
+#include "DataSet3D.h"
+#include "LBMKernel.h"
+#include "Block3D.h"
+#include "BCArray3D.h"
+
+
+//#define PROOF_CORRECTNESS
+
+using namespace UbMath;
+
+//////////////////////////////////////////////////////////////////////////
+CumulantK17LBMKernelUnified::CumulantK17LBMKernelUnified()
+{
+    this->compressible = true;
+}
+//////////////////////////////////////////////////////////////////////////
+void CumulantK17LBMKernelUnified::initDataSet()
+{
+    SPtr<DistributionArray3D> d(new D3Q27EsoTwist3DSplittedVector(nx[0] + 2, nx[1] + 2, nx[2] + 2, -999.9));
+    dataSet->setFdistributions(d);
+}
+//////////////////////////////////////////////////////////////////////////
+SPtr<LBMKernel> CumulantK17LBMKernelUnified::clone()
+{
+    SPtr<LBMKernel> kernel(new CumulantK17LBMKernelUnified());
+    kernel->setNX(nx);
+    std::dynamic_pointer_cast<CumulantK17LBMKernelUnified>(kernel)->initDataSet();
+    kernel->setCollisionFactor(this->collFactor);
+    kernel->setBCProcessor(bcProcessor->clone(kernel));
+    kernel->setWithForcing(withForcing);
+    kernel->setForcingX1(muForcingX1);
+    kernel->setForcingX2(muForcingX2);
+    kernel->setForcingX3(muForcingX3);
+    kernel->setIndex(ix1, ix2, ix3);
+    kernel->setDeltaT(deltaT);
+    kernel->setBlock(block.lock());
+
+    return kernel;
+}
+//////////////////////////////////////////////////////////////////////////
+void CumulantK17LBMKernelUnified::calculate(int step)
+{
+    //////////////////////////////////////////////////////////////////////////
+    //! Cumulant K17 Kernel is based on
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //! and
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
+    //!
+    //! The cumulant kernel is executed in the following steps
+    //!
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get node index coordinates from thredIdx, blockIdx, blockDim and gridDim.
+    //!
+
+    using namespace std;
+
+    //initializing of forcing stuff
+    if (withForcing)
+    {
+        muForcingX1.DefineVar("x1", &muX1); muForcingX1.DefineVar("x2", &muX2); muForcingX1.DefineVar("x3", &muX3);
+        muForcingX2.DefineVar("x1", &muX1); muForcingX2.DefineVar("x2", &muX2); muForcingX2.DefineVar("x3", &muX3);
+        muForcingX3.DefineVar("x1", &muX1); muForcingX3.DefineVar("x2", &muX2); muForcingX3.DefineVar("x3", &muX3);
+
+        muDeltaT = deltaT;
+
+        muForcingX1.DefineVar("dt", &muDeltaT);
+        muForcingX2.DefineVar("dt", &muDeltaT);
+        muForcingX3.DefineVar("dt", &muDeltaT);
+
+        muNu = (1.0 / 3.0) * (1.0 / collFactor - 1.0 / 2.0);
+
+        muForcingX1.DefineVar("nu", &muNu);
+        muForcingX2.DefineVar("nu", &muNu);
+        muForcingX3.DefineVar("nu", &muNu);
+    }
+    /////////////////////////////////////
+
+    localDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getLocalDistributions();
+    nonLocalDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getNonLocalDistributions();
+    restDistributions = dynamic_pointer_cast<D3Q27EsoTwist3DSplittedVector>(dataSet->getFdistributions())->getZeroDistributions();
+
+    SPtr<BCArray3D> bcArray = this->getBCProcessor()->getBCArray();
+
+    const int bcArrayMaxX1 = (int)bcArray->getNX1();
+    const int bcArrayMaxX2 = (int)bcArray->getNX2();
+    const int bcArrayMaxX3 = (int)bcArray->getNX3();
+
+    int minX1 = ghostLayerWidth;
+    int minX2 = ghostLayerWidth;
+    int minX3 = ghostLayerWidth;
+    int maxX1 = bcArrayMaxX1 - ghostLayerWidth;
+    int maxX2 = bcArrayMaxX2 - ghostLayerWidth;
+    int maxX3 = bcArrayMaxX3 - ghostLayerWidth;
+
+    LBMReal omega = collFactor;
+
+    for (int x3 = minX3; x3 < maxX3; x3++)
+    {
+        for (int x2 = minX2; x2 < maxX2; x2++)
+        {
+            for (int x1 = minX1; x1 < maxX1; x1++)
+            {
+                if (!bcArray->isSolid(x1, x2, x3) && !bcArray->isUndefined(x1, x2, x3))
+                {
+                    int x1p = x1 + 1;
+                    int x2p = x2 + 1;
+                    int x3p = x3 + 1;
+                    //////////////////////////////////////////////////////////////////////////
+                    //////////////////////////////////////////////////////////////////////////
+                    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm
+                    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+                    //!
+                    ////////////////////////////////////////////////////////////////////////////
+                    //////////////////////////////////////////////////////////////////////////
+
+                    //E   N  T
+                    //c   c  c
+                    //////////
+                    //W   S  B
+                    //a   a  a
+
+                    //Rest is b
+
+                    //mfxyz
+                    //a - negative
+                    //b - null
+                    //c - positive
+
+                    // a b c
+                    //-1 0 1
+
+                    LBMReal mfcbb = (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3);
+                    LBMReal mfbcb = (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3);
+                    LBMReal mfbbc = (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3);
+                    LBMReal mfccb = (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3);
+                    LBMReal mfacb = (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3);
+                    LBMReal mfcbc = (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3);
+                    LBMReal mfabc = (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3);
+                    LBMReal mfbcc = (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3);
+                    LBMReal mfbac = (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3);
+                    LBMReal mfccc = (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3);
+                    LBMReal mfacc = (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3);
+                    LBMReal mfcac = (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3);
+                    LBMReal mfaac = (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3);
+
+                    LBMReal mfabb = (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3);
+                    LBMReal mfbab = (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3);
+                    LBMReal mfbba = (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p);
+                    LBMReal mfaab = (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3);
+                    LBMReal mfcab = (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3);
+                    LBMReal mfaba = (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p);
+                    LBMReal mfcba = (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p);
+                    LBMReal mfbaa = (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p);
+                    LBMReal mfbca = (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p);
+                    LBMReal mfaaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p);
+                    LBMReal mfcaa = (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p);
+                    LBMReal mfaca = (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p);
+                    LBMReal mfcca = (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p);
+
+                    LBMReal mfbbb = (*this->restDistributions)(x1, x2, x3);
+
+                    
+                    LBMReal forces[3] = {0., 0., 0.};
+                    if (withForcing)
+                    {
+                        muX1 = static_cast<double>(x1 - 1 + ix1 * maxX1);
+                        muX2 = static_cast<double>(x2 - 1 + ix2 * maxX2);
+                        muX3 = static_cast<double>(x3 - 1 + ix3 * maxX3);
+
+                        forcingX1 = muForcingX1.Eval();
+                        forcingX2 = muForcingX2.Eval();
+                        forcingX3 = muForcingX3.Eval();
+
+                        forces[0] = forcingX1 * deltaT;
+                        forces[1] = forcingX2 * deltaT;
+                        forces[2] = forcingX3 * deltaT;
+                    }
+
+                    vf::lbm::Distribution27 distribution;
+
+                    distribution.f[vf::lbm::dir::PZZ] = mfcbb;
+                    distribution.f[vf::lbm::dir::MZZ] = mfabb;
+                    distribution.f[vf::lbm::dir::ZPZ] = mfbcb;
+                    distribution.f[vf::lbm::dir::ZMZ] = mfbab;
+                    distribution.f[vf::lbm::dir::ZZP] = mfbbc;
+                    distribution.f[vf::lbm::dir::ZZM] = mfbba;
+                    distribution.f[vf::lbm::dir::PPZ] = mfccb;
+                    distribution.f[vf::lbm::dir::MMZ] = mfaab;
+                    distribution.f[vf::lbm::dir::PMZ] = mfcab;
+                    distribution.f[vf::lbm::dir::MPZ] = mfacb;
+                    distribution.f[vf::lbm::dir::PZP] = mfcbc;
+                    distribution.f[vf::lbm::dir::MZM] = mfaba;
+                    distribution.f[vf::lbm::dir::PZM] = mfcba;
+                    distribution.f[vf::lbm::dir::MZP] = mfabc;
+                    distribution.f[vf::lbm::dir::ZPP] = mfbcc;
+                    distribution.f[vf::lbm::dir::ZMM] = mfbaa;
+                    distribution.f[vf::lbm::dir::ZPM] = mfbca;
+                    distribution.f[vf::lbm::dir::ZMP] = mfbac;
+                    distribution.f[vf::lbm::dir::PPP] = mfccc;
+                    distribution.f[vf::lbm::dir::MPP] = mfacc;
+                    distribution.f[vf::lbm::dir::PMP] = mfcac;
+                    distribution.f[vf::lbm::dir::MMP] = mfaac;
+                    distribution.f[vf::lbm::dir::PPM] = mfcca;
+                    distribution.f[vf::lbm::dir::MPM] = mfaca;
+                    distribution.f[vf::lbm::dir::PMM] = mfcaa;
+                    distribution.f[vf::lbm::dir::MMM] = mfaaa;
+                    distribution.f[vf::lbm::dir::ZZZ] = mfbbb;
+
+                    vf::lbm::KernelParameter parameter {distribution, omega, forces};
+                    vf::lbm::cumulantChimera(parameter, vf::lbm::setRelaxationRatesK17);
+
+                    mfcbb = distribution.f[vf::lbm::dir::PZZ];
+                    mfabb = distribution.f[vf::lbm::dir::MZZ];
+                    mfbcb = distribution.f[vf::lbm::dir::ZPZ];
+                    mfbab = distribution.f[vf::lbm::dir::ZMZ];
+                    mfbbc = distribution.f[vf::lbm::dir::ZZP];
+                    mfbba = distribution.f[vf::lbm::dir::ZZM];
+                    mfccb = distribution.f[vf::lbm::dir::PPZ];
+                    mfaab = distribution.f[vf::lbm::dir::MMZ];
+                    mfcab = distribution.f[vf::lbm::dir::PMZ];
+                    mfacb = distribution.f[vf::lbm::dir::MPZ];
+                    mfcbc = distribution.f[vf::lbm::dir::PZP];
+                    mfaba = distribution.f[vf::lbm::dir::MZM];
+                    mfcba = distribution.f[vf::lbm::dir::PZM];
+                    mfabc = distribution.f[vf::lbm::dir::MZP];
+                    mfbcc = distribution.f[vf::lbm::dir::ZPP];
+                    mfbaa = distribution.f[vf::lbm::dir::ZMM];
+                    mfbca = distribution.f[vf::lbm::dir::ZPM];
+                    mfbac = distribution.f[vf::lbm::dir::ZMP];
+                    mfccc = distribution.f[vf::lbm::dir::PPP];
+                    mfacc = distribution.f[vf::lbm::dir::MPP];
+                    mfcac = distribution.f[vf::lbm::dir::PMP];
+                    mfaac = distribution.f[vf::lbm::dir::MMP];
+                    mfcca = distribution.f[vf::lbm::dir::PPM];
+                    mfaca = distribution.f[vf::lbm::dir::MPM];
+                    mfcaa = distribution.f[vf::lbm::dir::PMM];
+                    mfaaa = distribution.f[vf::lbm::dir::MMM];
+                    mfbbb = distribution.f[vf::lbm::dir::ZZZ];
+
+                    //////////////////////////////////////////////////////////////////////////
+                    //proof correctness
+                    //////////////////////////////////////////////////////////////////////////
+#ifdef  PROOF_CORRECTNESS
+                    LBMReal drho_post = (mfaaa + mfaac + mfaca + mfcaa + mfacc + mfcac + mfccc + mfcca)
+                                        + (mfaab + mfacb + mfcab + mfccb) + (mfaba + mfabc + mfcba + mfcbc) + (mfbaa + mfbac + mfbca + mfbcc)
+                                        + (mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc) + mfbbb;
+                    LBMReal dif = distribution.getDensity_() - drho_post;
+#ifdef SINGLEPRECISION
+                    if (dif > 10.0E-7 || dif < -10.0E-7)
+#else
+                    if (dif > 10.0E-15 || dif < -10.0E-15)
+#endif
+                    {
+                        UB_THROW(UbException(UB_EXARGS, "rho=" + UbSystem::toString(distribution.getDensity_()) + ", rho_post=" + UbSystem::toString(drho_post)
+                                                        + " dif=" + UbSystem::toString(dif)
+                                                        + " rho is not correct for node " + UbSystem::toString(x1) + "," + UbSystem::toString(x2) + "," + UbSystem::toString(x3)
+                                                        + " in " + block.lock()->toString() + " step = " + UbSystem::toString(step)));
+                    }
+#endif
+                    
+                    (*this->localDistributions)(D3Q27System::ET_E, x1, x2, x3) = mfcbb;
+                    (*this->localDistributions)(D3Q27System::ET_N, x1, x2, x3) = mfbcb;
+                    (*this->localDistributions)(D3Q27System::ET_T, x1, x2, x3) = mfbbc;
+                    (*this->localDistributions)(D3Q27System::ET_NE, x1, x2, x3) = mfccb;
+                    (*this->localDistributions)(D3Q27System::ET_NW, x1p, x2, x3) = mfacb;
+                    (*this->localDistributions)(D3Q27System::ET_TE, x1, x2, x3) = mfcbc;
+                    (*this->localDistributions)(D3Q27System::ET_TW, x1p, x2, x3) = mfabc;
+                    (*this->localDistributions)(D3Q27System::ET_TN, x1, x2, x3) = mfbcc;
+                    (*this->localDistributions)(D3Q27System::ET_TS, x1, x2p, x3) = mfbac;
+                    (*this->localDistributions)(D3Q27System::ET_TNE, x1, x2, x3) = mfccc;
+                    (*this->localDistributions)(D3Q27System::ET_TNW, x1p, x2, x3) = mfacc;
+                    (*this->localDistributions)(D3Q27System::ET_TSE, x1, x2p, x3) = mfcac;
+                    (*this->localDistributions)(D3Q27System::ET_TSW, x1p, x2p, x3) = mfaac;
+
+                    (*this->nonLocalDistributions)(D3Q27System::ET_W, x1p, x2, x3) =  mfabb;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_S, x1, x2p, x3) =  mfbab;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_B, x1, x2, x3p) =  mfbba;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SW, x1p, x2p, x3) = mfaab;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_SE, x1, x2p, x3) = mfcab;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BW, x1p, x2, x3p) = mfaba;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BE, x1, x2, x3p) = mfcba;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BS, x1, x2p, x3p) = mfbaa;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BN, x1, x2, x3p) = mfbca;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSW, x1p, x2p, x3p) = mfaaa;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BSE, x1, x2p, x3p) = mfcaa;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNW, x1p, x2, x3p) = mfaca;
+                    (*this->nonLocalDistributions)(D3Q27System::ET_BNE, x1, x2, x3p) = mfcca;
+                    (*this->restDistributions)(x1, x2, x3) = mfbbb;
+                    //////////////////////////////////////////////////////////////////////////
+
+                }
+            }
+        }
+    }
+}
+//////////////////////////////////////////////////////////////////////////
+
diff --git a/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
new file mode 100644
index 0000000000000000000000000000000000000000..175fdd4cba2a0c8ce47248f5de6672f34dda0cc3
--- /dev/null
+++ b/src/cpu/VirtualFluidsCore/LBM/CumulantK17LBMKernelUnified.h
@@ -0,0 +1,77 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CumulantK17LBMKernel.h
+//! \ingroup LBM
+//! \author Konstantin Kutscher, Martin Geier
+//=======================================================================================
+
+#ifndef CumulantK17LBMKernelUnified_h__
+#define CumulantK17LBMKernelUnified_h__
+
+#include "LBMKernel.h"
+#include "BCProcessor.h"
+#include "D3Q27System.h"
+#include "basics/utilities/UbTiming.h"
+#include "basics/container/CbArray4D.h"
+#include "basics/container/CbArray3D.h"
+
+//! \brief   Compressible cumulant LBM kernel.
+//! \details  LBM implementation that use Cascaded Cumulant Lattice Boltzmann method for D3Q27 model
+//!
+//! The model is publisched in
+//! <a href="http://dx.doi.org/10.1016/j.jcp.2017.05.040"><b>[ Geier et al., (2017), 10.1016/j.jcp.2017.05.040]</b></a>,
+//! <a href="http://dx.doi.org/10.1016/j.jcp.2017.07.004"><b>[ Geier et al., (2017), 10.1016/j.jcp.2017.07.004]</b></a>
+//!
+class CumulantK17LBMKernelUnified : public LBMKernel
+{
+public:
+    CumulantK17LBMKernelUnified();
+    ~CumulantK17LBMKernelUnified() = default;
+    void calculate(int step) override;
+    SPtr<LBMKernel> clone() override;
+    double getCalculationTime() override { return .0; }
+
+protected:
+    virtual void initDataSet();
+    LBMReal f[D3Q27System::ENDF + 1];
+
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr localDistributions;
+    CbArray4D<LBMReal, IndexerX4X3X2X1>::CbArray4DPtr nonLocalDistributions;
+    CbArray3D<LBMReal, IndexerX3X2X1>::CbArray3DPtr restDistributions;
+
+    mu::value_type muX1, muX2, muX3;
+    mu::value_type muDeltaT;
+    mu::value_type muNu;
+    LBMReal forcingX1;
+    LBMReal forcingX2;
+    LBMReal forcingX3;
+};
+
+
+#endif // CumulantK17LBMKernel_h__
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
index 5e88242fa23bdf37386f4f4d293ed7c2c33750ee..105603b35d043799672a58fda83ba503af70b703 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.cpp
@@ -1,5 +1,7 @@
 #include "D3Q27System.h"
 
+#include "lbm/MacroscopicQuantities.h"
+
 namespace D3Q27System
 {
 using namespace UbMath;
@@ -18,4 +20,31 @@ const int INVDIR[] = { INV_E,   INV_W,   INV_N,   INV_S,   INV_T,   INV_B,   INV
                        INV_NW,  INV_TE,  INV_BW,  INV_BE,  INV_TW,  INV_TN,  INV_BS,  INV_BN, INV_TS,
                        INV_TNE, INV_TNW, INV_TSE, INV_TSW, INV_BNE, INV_BNW, INV_BSE, INV_BSW };
 
+
+
+
+LBMReal getDensity(const LBMReal *const &f /*[27]*/)
+{
+    return vf::lbm::getDensity(f);
+}
+
+LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/)
+{
+    return vf::lbm::getIncompressibleVelocityX1(f);
+}
+
+LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/)
+{
+    return vf::lbm::getIncompressibleVelocityX2(f);
+}
+
+LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/)
+{
+    return vf::lbm::getIncompressibleVelocityX3(f);
+}
+
+
+
+
+
 } // namespace D3Q27System
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
index 8f8c376459ce0b9bd4d0b7691655c173a6d4e149..04fe8a819b143db8cc935fc893faa514805060a6 100644
--- a/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
+++ b/src/cpu/VirtualFluidsCore/LBM/D3Q27System.h
@@ -42,6 +42,7 @@
 #include "UbException.h"
 #include "UbMath.h"
 
+
 //! \brief namespace for global system-functions
 namespace D3Q27System
 {
@@ -147,36 +148,21 @@ static const int ET_BNW = 11;
 static const int ET_TSW = 12;
 static const int ET_BNE = 12;
 
+
 //////////////////////////////////////////////////////////////////////////
 // MACROSCOPIC VALUES
 /*=====================================================================*/
-static LBMReal getDensity(const LBMReal *const &f /*[27]*/)
-{
-    return ((f[TNE] + f[BSW]) + (f[TSE] + f[BNW])) + ((f[BSE] + f[TNW]) + (f[TSW] + f[BNE])) +
-           (((f[NE] + f[SW]) + (f[SE] + f[NW])) + ((f[TE] + f[BW]) + (f[BE] + f[TW])) +
-            ((f[BN] + f[TS]) + (f[TN] + f[BS]))) +
-           ((f[E] + f[W]) + (f[N] + f[S]) + (f[T] + f[B])) + f[REST];
-}
+LBMReal getDensity(const LBMReal *const &f /*[27]*/);
 /*=====================================================================*/
 static LBMReal getPressure(const LBMReal *const &f /*[27]*/) { return REAL_CAST(UbMath::c1o3) * getDensity(f); }
 /*=====================================================================*/
-static LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/)
-{
-    return ((((f[TNE] - f[BSW]) + (f[TSE] - f[BNW])) + ((f[BSE] - f[TNW]) + (f[BNE] - f[TSW]))) +
-            (((f[BE] - f[TW]) + (f[TE] - f[BW])) + ((f[SE] - f[NW]) + (f[NE] - f[SW]))) + (f[E] - f[W]));
-}
+LBMReal getIncompVelocityX1(const LBMReal *const &f /*[27]*/);
 /*=====================================================================*/
-static LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/)
-{
-    return ((((f[TNE] - f[BSW]) + (f[BNW] - f[TSE])) + ((f[TNW] - f[BSE]) + (f[BNE] - f[TSW]))) +
-            (((f[BN] - f[TS]) + (f[TN] - f[BS])) + ((f[NW] - f[SE]) + (f[NE] - f[SW]))) + (f[N] - f[S]));
-}
+LBMReal getIncompVelocityX2(const LBMReal *const &f /*[27]*/);
 /*=====================================================================*/
-static LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/)
-{
-    return ((((f[TNE] - f[BSW]) + (f[TSE] - f[BNW])) + ((f[TNW] - f[BSE]) + (f[TSW] - f[BNE]))) +
-            (((f[TS] - f[BN]) + (f[TN] - f[BS])) + ((f[TW] - f[BE]) + (f[TE] - f[BW]))) + (f[T] - f[B]));
-}
+LBMReal getIncompVelocityX3(const LBMReal *const &f /*[27]*/);
+
+
 /*=====================================================================*/
 static void calcDensity(const LBMReal *const &f /*[27]*/, LBMReal &rho)
 {
diff --git a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
index 4dbe8eee09a37c0c220f47619b72bade2e6ec527..bde61d9d314b61327ff8f8a2a71d2864d50cc7f5 100644
--- a/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/ILBMKernel.h
@@ -36,6 +36,8 @@
 
 #include <PointerDefinitions.h>
 
+#include "LBMSystem.h"
+
 class BCProcessor;
 class DataSet3D;
 
@@ -57,7 +59,7 @@ public:
     virtual void setCollisionFactor(double collFactor)                               = 0;
     virtual bool isInsideOfDomain(const int &x1, const int &x2, const int &x3) const = 0;
     virtual int getGhostLayerWidth() const                                           = 0;
-    virtual double getDeltaT() const                                                 = 0;
+    virtual LBMReal getDeltaT() const                                                = 0;
     virtual bool getWithForcing() const                                              = 0;
 };
 
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
index f3d168733db86ea03a72a9741ba819ed6874a0c1..fa0fd08c9ac1da11483dad6061c96fd3672ad3a4 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.cpp
@@ -201,13 +201,13 @@ void LBMKernel::setDensityRatio(double densityRatio) { this->densityRatio = dens
 //////////////////////////////////////////////////////////////////////////
 double LBMKernel::getDensityRatio() const { return densityRatio; }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::setMultiphaseModelParameters(double beta, double kappa)
+void LBMKernel::setMultiphaseModelParameters(LBMReal beta, LBMReal kappa)
 {
     this->beta  = beta;
     this->kappa = kappa;
 }
 //////////////////////////////////////////////////////////////////////////
-void LBMKernel::getMultiphaseModelParameters(double &beta, double &kappa)
+void LBMKernel::getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa)
 {
     beta  = this->beta;
     kappa = this->kappa;
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
index 5c3e067656e2fc5cb06046e28c0442b15c180050..5e211f87b3c5170589838bd71f8f7c22c2d4a431 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMKernel.h
@@ -53,6 +53,7 @@ public:
 
 public:
     LBMKernel();
+    virtual ~LBMKernel() = default;
 
     virtual SPtr<LBMKernel> clone() = 0;
 
@@ -117,8 +118,8 @@ public:
     double getCollisionFactorG() const;
     void setDensityRatio(double densityRatio);
     double getDensityRatio() const;
-    void setMultiphaseModelParameters(double beta, double kappa);
-    void getMultiphaseModelParameters(double &beta, double &kappa);
+    void setMultiphaseModelParameters(LBMReal beta, LBMReal kappa);
+    void getMultiphaseModelParameters(LBMReal &beta, LBMReal &kappa);
     void setContactAngle(double contactAngle);
     double getContactAngle() const;
     void setPhiL(double phiL);
diff --git a/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h b/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
index d24775aef0fbe6822fbdc03d42e2acfeb3021430..14b4d223b2e07e3dbca9947cefd89de045bfb3cf 100644
--- a/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
+++ b/src/cpu/VirtualFluidsCore/LBM/LBMSystem.h
@@ -37,6 +37,8 @@
 #include <iostream>
 #include <string>
 
+#include "basics/Core/DataTypes.h"
+
 //! \brief namespace for global system-functions
 
 namespace LBMSystem
@@ -45,10 +47,10 @@ namespace LBMSystem
 //#define SINGLEPRECISION
 
 #ifdef SINGLEPRECISION
-typedef float real;
+//using real = float;
 #define REAL_CAST(x) ((LBMSystem::real)(x))
 #else
-using real = double;
+//using real = double;
 #define REAL_CAST(x) (x)
 #endif
 
@@ -82,6 +84,7 @@ static real calcOmega2(real viscosity, real deltaT) { return REAL_CAST(1.0 / (4.
 } // namespace LBMSystem
 
 // some typedefs for global namespace
-using LBMReal = LBMSystem::real;
+//using LBMReal = LBMSystem::real;
+using LBMReal = real;
 
 #endif
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h b/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h
index 3c25cc78f74e7a4efbca436bc0dc07b62ce4fa6e..cd63d8f1db9e27ab3572a26693355c88a303104f 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/MPICommunicator.h
@@ -19,7 +19,7 @@ class MPICommunicator : public Communicator
 {
 private:
     MPICommunicator();
-    MPICommunicator(const MPICommunicator &) {}
+    MPICommunicator(const MPICommunicator &) = default;
 
 public:
     ~MPICommunicator() override;
diff --git a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
index 8b284fc2f768472a4115c61cd567ce0b37b7f4e9..c8bd2d0797af86858b40a1a29a154107f04e46c8 100644
--- a/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
+++ b/src/cpu/VirtualFluidsCore/Parallel/MPIIODataStructures.h
@@ -59,6 +59,9 @@ struct dataSetParam {
 struct DataSetRestart {
     double collFactor;
     double deltaT;
+    double collFactorL; // for Multiphase model
+    double collFactorG; // for Multiphase model
+    double densityRatio;// for Multiphase model
     int x1;
     int x2;
     int x3;
@@ -74,6 +77,9 @@ struct DataSetRestart {
 struct DataSetMigration {
     double collFactor;
     double deltaT;
+    double collFactorL; // for Multiphase model
+    double collFactorG; // for Multiphase model
+    double densityRatio;// for Multiphase model
     int globalID;
     int ghostLayerWidth;
     bool compressible;
@@ -164,7 +170,8 @@ struct DSArraysPresence {
     bool isAverageTripleArrayPresent;
     bool isShearStressValArrayPresent;
     bool isRelaxationFactorPresent;
-    bool isPhaseFieldPresent;
+    bool isPhaseField1Present;
+    bool isPhaseField2Present;
 };
 } // namespace MPIIODataStructures
 #endif
\ No newline at end of file
diff --git a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
index 914659afa685814842904e9622c31b875d6a2207..b66eff480e99102edf332cfd750e0d2b6965ba83 100644
--- a/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
+++ b/src/cpu/VirtualFluidsCore/Utilities/CheckpointConverter.cpp
@@ -77,7 +77,7 @@ CheckpointConverter::CheckpointConverter(SPtr<Grid3D> grid, const std::string &p
     //---------------------------------------
 
     MPI_Datatype typesDataSetRead[3] = { MPI_DOUBLE, MPI_INT, MPI_CHAR };
-    int blocksDataSetRead[3]         = { 2, 5, 2 };
+    int blocksDataSetRead[3]         = { 3, 5, 2 };
     MPI_Aint offsetsDataSetRead[3], lbDataSetRead, extentDataSetRead;
 
     offsetsDataSetRead[0] = 0;
@@ -358,6 +358,7 @@ void CheckpointConverter::convertDataSet(int step, int procCount)
             dataSetWriteArray[nb].deltaT          = dataSetReadArray[nb].deltaT;
             dataSetWriteArray[nb].compressible    = dataSetReadArray[nb].compressible;
             dataSetWriteArray[nb].withForcing     = dataSetReadArray[nb].withForcing;
+//            dataSetWriteArray[nb].densityRatio    = dataSetReadArray[nb].densityRatio;
 
             write_offset = (MPI_Offset)(3 * sizeof(dataSetParam) + dataSetWriteArray[nb].globalID * sizeofOneDataSet);
             MPI_File_write_at(file_handlerW, write_offset, &dataSetWriteArray[nb], 1, dataSetTypeWrite,
diff --git a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
index b4eafda902448564800bb1479f7ac60f74c4b77d..a6372fc31712899dab0b8edaf919a141663991ca 100644
--- a/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
+++ b/src/cpu/VirtualFluidsCore/Visitors/BoundaryConditionsBlockVisitor.cpp
@@ -98,6 +98,7 @@ void BoundaryConditionsBlockVisitor::visit(SPtr<Grid3D> grid, SPtr<Block3D> bloc
 
                             if (bca) {
                                 bca = bca->clone();
+                                bca->setBlock(block);
                                 bca->setNodeIndex(x1, x2, x3);
                                 bca->setBcPointer(bcPtr);
                                 bca->addDistributions(distributions);
diff --git a/src/cpu/pythonbindings/CMakeLists.txt b/src/cpu/pythonbindings/CMakeLists.txt
index 4c728febe0e669fa308d45158c7fae4ce3762c05..3b4e7e5a1506899710dc03ea275c0d4ac1cff66d 100644
--- a/src/cpu/pythonbindings/CMakeLists.txt
+++ b/src/cpu/pythonbindings/CMakeLists.txt
@@ -12,4 +12,5 @@ target_compile_definitions(pymuparser PRIVATE VF_METIS VF_MPI)
 target_link_libraries(pyfluids PRIVATE simulationconfig VirtualFluidsCore muparser basics)
 target_link_libraries(pymuparser PRIVATE muparser)
 
-
+target_include_directories(pyfluids PRIVATE ${CMAKE_SOURCE_DIR}/src/)
+target_include_directories(pyfluids PRIVATE ${CMAKE_BINARY_DIR})
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp b/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp
index 61c6005b9fbdd1108d9ffff374763cad95a5c40e..564dc1838d48a92340fa5491779177b299bcb270 100644
--- a/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp
+++ b/src/cpu/pythonbindings/src/VirtualfluidsModule.cpp
@@ -6,14 +6,17 @@
 #include "submodules/simulationparameters.cpp"
 #include "submodules/writer.cpp"
 
-namespace py = pybind11;
-
-PYBIND11_MODULE(pyfluids, m)
+namespace py_bindings
 {
-    makeBoundaryConditionsModule(m);
-    makeSimulationModule(m);
-    makeGeometryModule(m);
-    makeKernelModule(m);
-    makeParametersModule(m);
-    makeWriterModule(m);
+    namespace py = pybind11;
+
+    PYBIND11_MODULE(pyfluids, m)
+    {
+        boundaryconditions::makeModule(m);
+        simulation::makeModule(m);
+        geometry::makeModule(m);
+        kernel::makeModule(m);
+        parameters::makeModule(m);
+        writer::makeModule(m);
+    }
 }
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp b/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp
index ef8b923fd9aa2feea9c81da686df38cb891a4f84..3bff7bc069ca20fe1c0cf3d1847b9714e0381505 100644
--- a/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp
+++ b/src/cpu/pythonbindings/src/submodules/boundaryconditions.cpp
@@ -7,56 +7,58 @@
 #include <BoundaryConditions/VelocityBCAdapter.h>
 #include <BoundaryConditions/NoSlipBCAlgorithm.h>
 #include <BoundaryConditions/VelocityBCAlgorithm.h>
+#include <BoundaryConditions/HighViscosityNoSlipBCAlgorithm.h>
 
-namespace py = pybind11;
-using namespace py::literals;
+namespace boundaryconditions
+{
+    namespace py = pybind11;
+    using namespace py::literals;
 
-template<class Adapter>
-using py_bc_class = py::class_<Adapter, BCAdapter, std::shared_ptr<Adapter>>;
+    template<class adapter, class algorithm,
+            class = std::enable_if_t<std::is_base_of<BCAdapter, adapter>::value>,
+            class = std::enable_if_t<std::is_base_of<BCAlgorithm, algorithm>::value>>
+    class PyBoundaryCondition : public adapter
+    {
+    public:
+        template<typename ...Args>
+        PyBoundaryCondition(Args &&... args) : adapter(std::forward<Args>(args)...)
+        {
+            this->setBcAlgorithm(std::make_shared<algorithm>());
+        }
+    };
 
-template<class Adapter, class Algorithm, typename ...Args>
-std::shared_ptr<Adapter> create(Args... args)
-{
-    auto adapter = std::make_shared<Adapter>(args...);
-    adapter->setBcAlgorithm(std::make_shared<Algorithm>());
-    return adapter;
-}
+    template<class adapter, class algorithm>
+    using bc_class = py::class_<PyBoundaryCondition<adapter, algorithm>, BCAdapter,
+            std::shared_ptr<PyBoundaryCondition<adapter, algorithm>>>;
 
-template<class Algorithm>
-void add_constructors_to_velocity_bc(py_bc_class<VelocityBCAdapter> &cls)
-{
-    auto firstConstructor = &create<VelocityBCAdapter, Algorithm, bool &, bool &, bool &, mu::Parser &, double &, double &>;
-    auto secondConstructor = &create<VelocityBCAdapter, Algorithm,
-            bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, double &, double &>;
-    auto thirdConstructor = &create<VelocityBCAdapter, Algorithm,
-            double &, double &, double &, double &, double &, double &, double &, double &, double &>;
-
-    cls.def(py::init(&create<VelocityBCAdapter, Algorithm>))
-            .def(py::init(firstConstructor),
-                 "vx1"_a, "vx2"_a, "vx3"_a, "function"_a, "start_time"_a, "end_time"_a)
-            .def(py::init(secondConstructor),
-                 "vx1"_a, "vx2"_a, "vx3"_a,
-                 "function_vx1"_a, "function_vx2"_a, "function_vx2"_a,
-                 "start_time"_a, "end_time"_a)
-            .def(py::init(thirdConstructor),
-                 "vx1"_a, "vx1_start_time"_a, "vx1_end_time"_a,
-                 "vx2"_a, "vx2_start_time"_a, "vx2_end_time"_a,
-                 "vx3"_a, "vx3_start_time"_a, "vx3_end_time"_a);
-}
-
-void makeBoundaryConditionsModule(py::module_ &parentModule)
-{
-    py::module_ bcModule = parentModule.def_submodule("boundaryconditions");
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module_ bcModule = parentModule.def_submodule("boundaryconditions");
+
+        auto _ = py::class_<BCAdapter, std::shared_ptr<BCAdapter>>(bcModule, "BCAdapter");
 
-    py::class_<BCAdapter, std::shared_ptr<BCAdapter>>(bcModule, "BCAdapter");
+        bc_class<NoSlipBCAdapter, NoSlipBCAlgorithm>(bcModule, "NoSlipBoundaryCondition")
+                .def(py::init());
 
-    py_bc_class<NoSlipBCAdapter>(bcModule, "NoSlipBoundaryCondition")
-            .def(py::init(&create<NoSlipBCAdapter, NoSlipBCAlgorithm>));
+        bc_class<NoSlipBCAdapter, HighViscosityNoSlipBCAlgorithm>(bcModule, "HighViscosityNoSlipBoundaryCondition")
+                .def(py::init());
 
-    auto velocityBoundaryCondition = py_bc_class<VelocityBCAdapter>(bcModule, "VelocityBoundaryCondition");
-    add_constructors_to_velocity_bc<VelocityBCAlgorithm>(velocityBoundaryCondition);
+        bc_class<VelocityBCAdapter, VelocityBCAlgorithm>(bcModule, "VelocityBoundaryCondition")
+                .def(py::init())
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, double &, double &>(),
+                     "vx1"_a, "vx2"_a, "vx3"_a,
+                     "function"_a, "start_time"_a, "end_time"_a)
+                .def(py::init<bool &, bool &, bool &, mu::Parser &, mu::Parser &, mu::Parser &, double &, double &>(),
+                     "vx1"_a, "vx2"_a, "vx3"_a,
+                     "function_vx1"_a, "function_vx2"_a, "function_vx2"_a,
+                     "start_time"_a, "end_time"_a)
+                .def(py::init<double &, double &, double &, double &, double &, double &, double &, double &, double &>(),
+                     "vx1"_a, "vx1_start_time"_a, "vx1_end_time"_a,
+                     "vx2"_a, "vx2_start_time"_a, "vx2_end_time"_a,
+                     "vx3"_a, "vx3_start_time"_a, "vx3_end_time"_a);
 
-    py_bc_class<DensityBCAdapter>(bcModule, "DensityBoundaryCondition")
-            .def(py::init(&create<DensityBCAdapter, NonReflectingOutflowBCAlgorithm>));
-}
+        bc_class<DensityBCAdapter, NonReflectingOutflowBCAlgorithm>(bcModule, "NonReflectingOutflow")
+                .def(py::init());
+    }
 
+}
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/geometry.cpp b/src/cpu/pythonbindings/src/submodules/geometry.cpp
index 884ced7b92ddae8a30a8f482e4b22dcbfa37beec..b7ff4dd761258d41687589d2dd89c3479093753e 100644
--- a/src/cpu/pythonbindings/src/submodules/geometry.cpp
+++ b/src/cpu/pythonbindings/src/submodules/geometry.cpp
@@ -5,76 +5,80 @@
 #include <geometry3d/GbLine3D.h>
 #include <Interactors/Interactor3D.h>
 
-namespace py = pybind11;
 
-template<class GeoObject>
-using py_geometry = py::class_<GeoObject, GbObject3D, std::shared_ptr<GeoObject>>;
-
-std::string GbPoint3D_repr_(const GbPoint3D &instance)
+namespace geometry
 {
-    std::ostringstream stream;
-    stream << "<GbPoint3D"
-           << " x1: " << instance.getX1Coordinate()
-           << " x2: " << instance.getX2Coordinate()
-           << " x3: " << instance.getX3Coordinate() << ">";
+    namespace py = pybind11;
 
-    return stream.str();
-}
+    template<class GeoObject>
+    using py_geometry = py::class_<GeoObject, GbObject3D, std::shared_ptr<GeoObject>>;
 
-void makeGeometryModule(py::module_ &parentModule)
-{
-    py::module geometry = parentModule.def_submodule("geometry");
+    std::string GbPoint3D_repr_(const GbPoint3D &instance)
+    {
+        std::ostringstream stream;
+        stream << "<GbPoint3D"
+               << " x1: " << instance.getX1Coordinate()
+               << " x2: " << instance.getX2Coordinate()
+               << " x3: " << instance.getX3Coordinate() << ">";
+
+        return stream.str();
+    }
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module geometry = parentModule.def_submodule("geometry");
+
+        py::class_<GbObject3D, std::shared_ptr<GbObject3D>>(geometry, "GbObject3D");
 
-    py::class_<GbObject3D, std::shared_ptr<GbObject3D>>(geometry, "GbObject3D");
+        py_geometry<GbPoint3D>(geometry, "GbPoint3D")
+                .def(py::init())
+                .def(py::init<double &, double &, double &>())
+                .def(py::init<GbPoint3D *>())
+                .def_property("x1", &GbPoint3D::getX1Coordinate, &GbPoint3D::setX1)
+                .def_property("x2", &GbPoint3D::getX2Coordinate, &GbPoint3D::setX2)
+                .def_property("x3", &GbPoint3D::getX3Coordinate, &GbPoint3D::setX3)
+                .def("get_distance", &GbPoint3D::getDistance)
+                .def("__repr__", &GbPoint3D_repr_);
 
-    py_geometry<GbPoint3D>(geometry, "GbPoint3D")
-            .def(py::init())
-            .def(py::init<double &, double &, double &>())
-            .def(py::init<GbPoint3D *>())
-            .def_property("x1", &GbPoint3D::getX1Coordinate, &GbPoint3D::setX1)
-            .def_property("x2", &GbPoint3D::getX2Coordinate, &GbPoint3D::setX2)
-            .def_property("x3", &GbPoint3D::getX3Coordinate, &GbPoint3D::setX3)
-            .def("get_distance", &GbPoint3D::getDistance)
-            .def("__repr__", [&](const GbPoint3D &instance)
-            { return GbPoint3D_repr_(instance); });
+        py_geometry<GbCuboid3D>(geometry, "GbCuboid3D")
+                .def(py::init())
+                .def(py::init<double &, double &, double &, double &, double &, double &>())
+                .def(py::init<GbPoint3D *, GbPoint3D *>())
+                .def(py::init<GbCuboid3D *>())
+                .def_property("point1", &GbCuboid3D::getPoint1, &GbCuboid3D::setPoint1)
+                .def_property("point2", &GbCuboid3D::getPoint2, &GbCuboid3D::setPoint2)
+                .def("__repr__", [&](GbCuboid3D &instance)
+                {
+                    std::ostringstream stream;
+                    stream << "<GbCuboid3D" << std::endl
+                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
+                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
+                    return stream.str();
+                });
 
-    py_geometry<GbCuboid3D>(geometry, "GbCuboid3D")
-            .def(py::init())
-            .def(py::init<double &, double &, double &, double &, double &, double &>())
-            .def(py::init<GbPoint3D *, GbPoint3D *>())
-            .def(py::init<GbCuboid3D *>())
-            .def_property("point1", &GbCuboid3D::getPoint1, &GbCuboid3D::setPoint1)
-            .def_property("point2", &GbCuboid3D::getPoint2, &GbCuboid3D::setPoint2)
-            .def("__repr__", [&](GbCuboid3D instance)
-            {
-                std::ostringstream stream;
-                stream << "<GbCuboid3D" << std::endl
-                       << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
-                       << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
-                return stream.str();
-            });
+        py_geometry<GbLine3D>(geometry, "GbLine3D")
+                .def(py::init())
+                .def(py::init<GbPoint3D *, GbPoint3D *>())
+                .def(py::init<GbLine3D>())
+                .def_property("point1", &GbLine3D::getPoint1, &GbLine3D::setPoint1)
+                .def_property("point2", &GbLine3D::getPoint2, &GbLine3D::setPoint2)
+                .def("__repr__", [&](GbLine3D &instance)
+                {
+                    std::ostringstream stream;
+                    stream << "<GbLine3D" << std::endl
+                           << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
+                           << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
+                    return stream.str();
+                });
 
-    py_geometry<GbLine3D>(geometry, "GbLine3D")
-            .def(py::init())
-            .def(py::init<GbPoint3D *, GbPoint3D *>())
-            .def(py::init<GbLine3D>())
-            .def_property("point1", &GbLine3D::getPoint1, &GbLine3D::setPoint1)
-            .def_property("point2", &GbLine3D::getPoint2, &GbLine3D::setPoint2)
-            .def("__repr__", [&](GbLine3D instance)
-            {
-                std::ostringstream stream;
-                stream << "<GbLine3D" << std::endl
-                       << "point1: " << GbPoint3D_repr_(instance.getPoint1()) << std::endl
-                       << "point2: " << GbPoint3D_repr_(instance.getPoint2()) << ">";
-                return stream.str();
-            });
 
+        py::class_<Interactor3D, std::shared_ptr<Interactor3D>>(geometry, "State")
+                .def_readonly_static("SOLID", &Interactor3D::SOLID)
+                .def_readonly_static("INVERSESOLID", &Interactor3D::INVERSESOLID)
+                .def_readonly_static("TIMEDEPENDENT", &Interactor3D::TIMEDEPENDENT)
+                .def_readonly_static("FLUID", &Interactor3D::FLUID)
+                .def_readonly_static("MOVEABLE", &Interactor3D::MOVEABLE)
+                .def_readonly_static("CHANGENOTNECESSARY", &Interactor3D::CHANGENOTNECESSARY);
+    }
 
-    py::class_<Interactor3D, std::shared_ptr<Interactor3D>>(geometry, "State")
-            .def_readonly_static("SOLID", &Interactor3D::SOLID)
-            .def_readonly_static("INVERSESOLID", &Interactor3D::INVERSESOLID)
-            .def_readonly_static("TIMEDEPENDENT", &Interactor3D::TIMEDEPENDENT)
-            .def_readonly_static("FLUID", &Interactor3D::FLUID)
-            .def_readonly_static("MOVEABLE", &Interactor3D::MOVEABLE)
-            .def_readonly_static("CHANGENOTNECESSARY", &Interactor3D::CHANGENOTNECESSARY);
 }
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/kernel.cpp b/src/cpu/pythonbindings/src/submodules/kernel.cpp
index 0e2e23c8c6a8ff56ea01c277e9825e7eb78c5c02..fb291790632cc2041410f60a14fca8d966283343 100644
--- a/src/cpu/pythonbindings/src/submodules/kernel.cpp
+++ b/src/cpu/pythonbindings/src/submodules/kernel.cpp
@@ -3,39 +3,43 @@
 #include <simulationconfig/KernelFactory.h>
 #include <simulationconfig/KernelConfigStructs.h>
 
-
-namespace py = pybind11;
-
-
-void makeKernelModule(py::module_ &parentModule)
+namespace kernel
 {
-    py::module kernelModule = parentModule.def_submodule("kernel");
-
-    py::enum_<KernelFactory::KernelType>(kernelModule, "KernelType")
-            .value("BGK", KernelFactory::BGK)
-            .value("CompressibleCumulantFourthOrderViscosity",
-                   KernelFactory::COMPRESSIBLE_CUMULANT_4TH_ORDER_VISCOSITY);
-
-
-    py::class_<LBMKernelConfiguration, std::shared_ptr<LBMKernelConfiguration>>(kernelModule, "LBMKernel")
-            .def(py::init<KernelFactory::KernelType>())
-            .def_readwrite("use_forcing", &LBMKernelConfiguration::useForcing)
-            .def_readwrite("forcing_in_x1", &LBMKernelConfiguration::forcingX1)
-            .def_readwrite("forcing_in_x2", &LBMKernelConfiguration::forcingX2)
-            .def_readwrite("forcing_in_x3", &LBMKernelConfiguration::forcingX3)
-            .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, double x1, double x2, double x3) {
-                kernelConfig.forcingX1 = x1;
-                kernelConfig.forcingX2 = x2;
-                kernelConfig.forcingX3 = x3;
-            })
-            .def("__repr__", [](LBMKernelConfiguration &kernelConfig) {
-                std::ostringstream stream;
-                stream << "<" << kernelConfig.kernelType << std::endl
-                       << "Use forcing: " << kernelConfig.useForcing << std::endl
-                       << "Forcing in x1: " << kernelConfig.forcingX1 << std::endl
-                       << "Forcing in x2: " << kernelConfig.forcingX2 << std::endl
-                       << "Forcing in x3: " << kernelConfig.forcingX3 << ">" << std::endl;
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module kernelModule = parentModule.def_submodule("kernel");
+
+        py::enum_<KernelFactory::KernelType>(kernelModule, "KernelType")
+                .value("BGK", KernelFactory::BGK)
+                .value("CompressibleCumulantFourthOrderViscosity",
+                       KernelFactory::COMPRESSIBLE_CUMULANT_4TH_ORDER_VISCOSITY);
+
+        py::class_<LBMKernelConfiguration, std::shared_ptr<LBMKernelConfiguration>>(kernelModule, "LBMKernel")
+                .def(py::init<KernelFactory::KernelType>())
+                .def_readwrite("type", &LBMKernelConfiguration::kernelType)
+                .def_readwrite("use_forcing", &LBMKernelConfiguration::useForcing)
+                .def_readwrite("forcing_in_x1", &LBMKernelConfiguration::forcingX1)
+                .def_readwrite("forcing_in_x2", &LBMKernelConfiguration::forcingX2)
+                .def_readwrite("forcing_in_x3", &LBMKernelConfiguration::forcingX3)
+                .def("set_forcing", [](LBMKernelConfiguration &kernelConfig, double x1, double x2, double x3)
+                {
+                    kernelConfig.forcingX1 = x1;
+                    kernelConfig.forcingX2 = x2;
+                    kernelConfig.forcingX3 = x3;
+                })
+                .def("__repr__", [](LBMKernelConfiguration &kernelConfig)
+                {
+                    std::ostringstream stream;
+                    stream << "<" << kernelConfig.kernelType << std::endl
+                           << "Use forcing: " << kernelConfig.useForcing << std::endl
+                           << "Forcing in x1: " << kernelConfig.forcingX1 << std::endl
+                           << "Forcing in x2: " << kernelConfig.forcingX2 << std::endl
+                           << "Forcing in x3: " << kernelConfig.forcingX3 << ">" << std::endl;
+
+                    return stream.str();
+                });
+    }
 
-                return stream.str();
-            });
 }
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp b/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp
index a525691bdf8dd9a4a0a3994b607816fba509c7a5..60af4e36af4dca67e9262dd9f5ee1f46d5b7bb58 100644
--- a/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp
+++ b/src/cpu/pythonbindings/src/submodules/simulationconfig.cpp
@@ -1,18 +1,22 @@
 #include <pybind11/pybind11.h>
 #include <simulationconfig/Simulation.h>
 
-namespace py = pybind11;
-
-void makeSimulationModule(py::module_ &parentModule)
+namespace simulation
 {
-    py::class_<Simulation, std::shared_ptr<Simulation>>(parentModule, "Simulation")
-            .def(py::init())
-            .def("set_writer", &Simulation::setWriterConfiguration)
-            .def("set_grid_parameters", &Simulation::setGridParameters)
-            .def("set_physical_parameters", &Simulation::setPhysicalParameters)
-            .def("set_runtime_parameters", &Simulation::setRuntimeParameters)
-            .def("set_kernel_config", &Simulation::setKernelConfiguration)
-            .def("add_object", &Simulation::addObject)
-            .def("add_bc_adapter", &Simulation::addBCAdapter)
-            .def("run_simulation", &Simulation::run);
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::class_<Simulation, std::shared_ptr<Simulation>>(parentModule, "Simulation")
+                .def(py::init())
+                .def("set_writer", &Simulation::setWriterConfiguration)
+                .def("set_grid_parameters", &Simulation::setGridParameters)
+                .def("set_physical_parameters", &Simulation::setPhysicalParameters)
+                .def("set_runtime_parameters", &Simulation::setRuntimeParameters)
+                .def("set_kernel_config", &Simulation::setKernelConfiguration)
+                .def("add_object", &Simulation::addObject)
+                .def("add_bc_adapter", &Simulation::addBCAdapter)
+                .def("run_simulation", &Simulation::run);
+    }
+
 }
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp b/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp
index f59d1c0ec0473c537f0cc334044bcd113f822687..acc272f2ee412cfbafd9007b4b18610cfd0a1e9b 100644
--- a/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp
+++ b/src/cpu/pythonbindings/src/submodules/simulationparameters.cpp
@@ -3,54 +3,57 @@
 #include <complex>
 #include <simulationconfig/SimulationParameters.h>
 
-namespace py = pybind11;
-
-void makeParametersModule(py::module_ &parentModule)
+namespace parameters
 {
-    py::module parametersModule = parentModule.def_submodule("parameters");
-
-    py::class_<PhysicalParameters, std::shared_ptr<PhysicalParameters>>(parametersModule, "PhysicalParameters")
-            .def(py::init())
-            .def_readwrite("bulk_viscosity_factor", &PhysicalParameters::bulkViscosityFactor,
-                           "The viscosity of the fluid will be multiplied with this factor to calculate its bulk viscosity. Default is 1.0")
-            .def_readwrite("lattice_viscosity", &PhysicalParameters::latticeViscosity, "Lattice viscosity");
-
-    py::class_<GridParameters, std::shared_ptr<GridParameters>>(parametersModule, "GridParameters")
-            .def(py::init())
-            .def_readwrite("node_distance", &GridParameters::nodeDistance)
-            .def_readwrite("reference_direction_index", &GridParameters::referenceDirectionIndex)
-            .def_readwrite("number_of_nodes_per_direction", &GridParameters::numberOfNodesPerDirection)
-            .def_readwrite("blocks_per_direction", &GridParameters::blocksPerDirection)
-            .def_readwrite("periodic_boundary_in_x1", &GridParameters::periodicBoundaryInX1)
-            .def_readwrite("periodic_boundary_in_x2", &GridParameters::periodicBoundaryInX2)
-            .def_readwrite("periodic_boundary_in_x3", &GridParameters::periodicBoundaryInX3)
-            .def_property_readonly("bounding_box", &GridParameters::boundingBox);
-
-    py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(parametersModule, "BoundingBox")
-            .def_readonly("min_x1", &BoundingBox::minX1)
-            .def_readonly("min_x2", &BoundingBox::minX2)
-            .def_readonly("min_x3", &BoundingBox::minX3)
-            .def_readonly("max_x1", &BoundingBox::maxX1)
-            .def_readonly("max_x2", &BoundingBox::maxX2)
-            .def_readonly("max_x3", &BoundingBox::maxX3)
-            .def("__repr__", [](BoundingBox &self)
-            {
-                std::ostringstream stream;
-                stream << "<BoundingBox" << std::endl
-                       << "min x1: " << self.minX1 << std::endl
-                       << "min x2: " << self.minX2 << std::endl
-                       << "min x3: " << self.minX3 << std::endl
-                       << "max x1: " << self.maxX1 << std::endl
-                       << "max x2: " << self.maxX2 << std::endl
-                       << "max x3: " << self.maxX3 << std::endl << ">";
-
-                return stream.str();
-            });
-
-    py::class_<RuntimeParameters, std::shared_ptr<RuntimeParameters>>(parametersModule, "RuntimeParameters")
-            .def(py::init())
-            .def_readwrite("number_of_timesteps", &RuntimeParameters::numberOfTimeSteps)
-            .def_readwrite("timestep_log_interval", &RuntimeParameters::timeStepLogInterval)
-            .def_readwrite("number_of_threads", &RuntimeParameters::numberOfThreads);
-
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module parametersModule = parentModule.def_submodule("parameters");
+
+        py::class_<PhysicalParameters, std::shared_ptr<PhysicalParameters>>(parametersModule, "PhysicalParameters")
+                .def(py::init())
+                .def_readwrite("bulk_viscosity_factor", &PhysicalParameters::bulkViscosityFactor,
+                               "The viscosity of the fluid will be multiplied with this factor to calculate its bulk viscosity. Default is 1.0")
+                .def_readwrite("lattice_viscosity", &PhysicalParameters::latticeViscosity, "Lattice viscosity");
+
+        py::class_<GridParameters, std::shared_ptr<GridParameters>>(parametersModule, "GridParameters")
+                .def(py::init())
+                .def_readwrite("node_distance", &GridParameters::nodeDistance)
+                .def_readwrite("reference_direction_index", &GridParameters::referenceDirectionIndex)
+                .def_readwrite("number_of_nodes_per_direction", &GridParameters::numberOfNodesPerDirection)
+                .def_readwrite("blocks_per_direction", &GridParameters::blocksPerDirection)
+                .def_readwrite("periodic_boundary_in_x1", &GridParameters::periodicBoundaryInX1)
+                .def_readwrite("periodic_boundary_in_x2", &GridParameters::periodicBoundaryInX2)
+                .def_readwrite("periodic_boundary_in_x3", &GridParameters::periodicBoundaryInX3)
+                .def_property_readonly("bounding_box", &GridParameters::boundingBox);
+
+        py::class_<BoundingBox, std::shared_ptr<BoundingBox>>(parametersModule, "BoundingBox")
+                .def_readonly("min_x1", &BoundingBox::minX1)
+                .def_readonly("min_x2", &BoundingBox::minX2)
+                .def_readonly("min_x3", &BoundingBox::minX3)
+                .def_readonly("max_x1", &BoundingBox::maxX1)
+                .def_readonly("max_x2", &BoundingBox::maxX2)
+                .def_readonly("max_x3", &BoundingBox::maxX3)
+                .def("__repr__", [](BoundingBox &self)
+                {
+                    std::ostringstream stream;
+                    stream << "<BoundingBox" << std::endl
+                           << "min x1: " << self.minX1 << std::endl
+                           << "min x2: " << self.minX2 << std::endl
+                           << "min x3: " << self.minX3 << std::endl
+                           << "max x1: " << self.maxX1 << std::endl
+                           << "max x2: " << self.maxX2 << std::endl
+                           << "max x3: " << self.maxX3 << std::endl << ">";
+
+                    return stream.str();
+                });
+
+        py::class_<RuntimeParameters, std::shared_ptr<RuntimeParameters>>(parametersModule, "RuntimeParameters")
+                .def(py::init())
+                .def_readwrite("number_of_timesteps", &RuntimeParameters::numberOfTimeSteps)
+                .def_readwrite("timestep_log_interval", &RuntimeParameters::timeStepLogInterval)
+                .def_readwrite("number_of_threads", &RuntimeParameters::numberOfThreads);
+
+    }
 }
\ No newline at end of file
diff --git a/src/cpu/pythonbindings/src/submodules/writer.cpp b/src/cpu/pythonbindings/src/submodules/writer.cpp
index 40819e4766eb30a442967067f954fef5508a4707..d5ec527a27caf63d9a3066c51e1f675b307fe0b2 100644
--- a/src/cpu/pythonbindings/src/submodules/writer.cpp
+++ b/src/cpu/pythonbindings/src/submodules/writer.cpp
@@ -1,18 +1,21 @@
 #include <pybind11/pybind11.h>
 #include <simulationconfig/WriterConfiguration.h>
 
-namespace py = pybind11;
-
-void makeWriterModule(py::module_ &parentModule)
+namespace writer
 {
-    py::module writerModule = parentModule.def_submodule("writer");
+    namespace py = pybind11;
+
+    void makeModule(py::module_ &parentModule)
+    {
+        py::module writerModule = parentModule.def_submodule("writer");
 
-    py::enum_<OutputFormat>(writerModule, "OutputFormat")
-            .value("ASCII", OutputFormat::ASCII)
-            .value("BINARY", OutputFormat::BINARY);
+        py::enum_<OutputFormat>(writerModule, "OutputFormat")
+                .value("ASCII", OutputFormat::ASCII)
+                .value("BINARY", OutputFormat::BINARY);
 
-    py::class_<WriterConfiguration>(writerModule, "Writer")
-            .def(py::init())
-            .def_readwrite("output_path", &WriterConfiguration::outputPath)
-            .def_readwrite("output_format", &WriterConfiguration::outputFormat);
+        py::class_<WriterConfiguration>(writerModule, "Writer")
+                .def(py::init())
+                .def_readwrite("output_path", &WriterConfiguration::outputPath)
+                .def_readwrite("output_format", &WriterConfiguration::outputFormat);
+    }
 }
\ No newline at end of file
diff --git a/src/cpu/simulationconfig/CMakeLists.txt b/src/cpu/simulationconfig/CMakeLists.txt
index 2e5e7f2eb9cba467e1ecbe3ec9cea13b3a3f322d..95ee969a049fd65cfadc6cc95d814e788a02aa8e 100644
--- a/src/cpu/simulationconfig/CMakeLists.txt
+++ b/src/cpu/simulationconfig/CMakeLists.txt
@@ -1,20 +1,8 @@
-cmake_minimum_required(VERSION 3.1)
 project(simulationconfig)
 
-set(VFBUILDER_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
 
-file(GLOB HEADERS
-        ${VFBUILDER_INCLUDE_DIR}/simulationconfig/*.h
-        ${VFBUILDER_INCLUDE_DIR}/simulationconfig/boundaryconditions/*.h)
+vf_add_library(NAME simulationconfig PUBLIC_LINK VirtualFluidsCore basics muparser)
 
-file(GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
-
-add_library(simulationconfig STATIC ${SRC} ${HEADERS})
-
-set_target_properties(simulationconfig PROPERTIES CXX_STANDARD 14 POSITION_INDEPENDENT_CODE ON)
-
-target_include_directories(simulationconfig PUBLIC ${VFBUILDER_INCLUDE_DIR})
-target_link_libraries(simulationconfig PRIVATE VirtualFluidsCore basics muparser)
-target_compile_definitions(simulationconfig PRIVATE VF_METIS VF_MPI)
-target_include_directories(simulationconfig PUBLIC ${CMAKE_BINARY_DIR})
+set_target_properties(simulationconfig PROPERTIES POSITION_INDEPENDENT_CODE ON)
 
+target_include_directories(simulationconfig PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/src/cpu/simulationconfig/include/simulationconfig/AbstractLBMSystem.h b/src/cpu/simulationconfig/include/simulationconfig/AbstractLBMSystem.h
index 0c3ac72db2d3cb3488860227171f2fd27600c028..156649c6fd3c060117e8247efab2765f56f7c77b 100644
--- a/src/cpu/simulationconfig/include/simulationconfig/AbstractLBMSystem.h
+++ b/src/cpu/simulationconfig/include/simulationconfig/AbstractLBMSystem.h
@@ -7,6 +7,8 @@
 
 class AbstractLBMSystem {
 public:
+    virtual ~AbstractLBMSystem() = default;
+
     virtual int getNumberOfDirections() = 0;
 
     virtual std::shared_ptr<Interactor3D> makeInteractor() = 0;
diff --git a/src/cpu/simulationconfig/include/simulationconfig/KernelFactory.h b/src/cpu/simulationconfig/include/simulationconfig/KernelFactory.h
index 6b151d96f9c538bcfbf3aeb693afd56e36e6cf35..6f984849bea91a832d0d778107feacb3cfdbdca6 100644
--- a/src/cpu/simulationconfig/include/simulationconfig/KernelFactory.h
+++ b/src/cpu/simulationconfig/include/simulationconfig/KernelFactory.h
@@ -20,6 +20,7 @@ public:
     };
 
     KernelFactory() = default;
+    virtual ~KernelFactory() = default;
 
     std::shared_ptr<LBMKernel> makeKernel(KernelType kernelType);
 
diff --git a/src/cpu/simulationconfig/include/simulationconfig/Simulation.h b/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
index 47f8b0e6ef1c844a70efcf0faedd5cbcdb7dbc05..4bf800c2375347ab7040424bdb2e4c53d5cc2bf8 100644
--- a/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
+++ b/src/cpu/simulationconfig/include/simulationconfig/Simulation.h
@@ -63,7 +63,9 @@ public:
     void run();
 
 private:
-    std::shared_ptr<GbObject3D> makeSimulationBoundingBox() const;
+    bool isMainProcess();
+
+    std::shared_ptr<GbObject3D> makeSimulationBoundingBox();
 
     void writeBlocksToFile() const;
 
@@ -84,6 +86,12 @@ private:
 
 
     void setKernelForcing(const std::shared_ptr<LBMKernel> &kernel, std::shared_ptr<LBMUnitConverter> &converter) const;
+
+    void setConnectors();
+
+    void initializeDistributions();
+
+    std::shared_ptr<CoProcessor> makeNupsCoProcessor() const;
 };
 
 #endif
\ No newline at end of file
diff --git a/src/cpu/simulationconfig/src/Simulation.cpp b/src/cpu/simulationconfig/src/Simulation.cpp
index 6d80a1cfa2cc62d2575d1e8f67193b879f217b90..f4fbad090fc60e424e777b4f601243eef8eb151e 100644
--- a/src/cpu/simulationconfig/src/Simulation.cpp
+++ b/src/cpu/simulationconfig/src/Simulation.cpp
@@ -65,6 +65,8 @@ Simulation::addObject(const std::shared_ptr<GbObject3D> &object, const std::shar
     const bool is_in = registeredAdapters.find(bcAdapter) != registeredAdapters.end();
     if (!is_in) addBCAdapter(bcAdapter);
     this->interactors.push_back(lbmSystem->makeInteractor(object, this->grid, bcAdapter, state));
+    if (communicator->getProcessID() != 0) return;
+
     GbSystem3D::writeGeoObject(object, writerConfig.outputPath + folderPath, writerConfig.getWriter());
 }
 
@@ -99,19 +101,20 @@ void Simulation::run()
     grid->setPeriodicX2(gridParameters->periodicBoundaryInX2);
     grid->setPeriodicX3(gridParameters->periodicBoundaryInX3);
 
-    int &numberOfNodesInReferenceDirection = gridParameters->numberOfNodesPerDirection[gridParameters->referenceDirectionIndex];
+    //int &numberOfNodesInReferenceDirection = gridParameters->numberOfNodesPerDirection[gridParameters->referenceDirectionIndex];
     std::shared_ptr<LBMUnitConverter> converter = makeLBMUnitConverter();
 
     int &nodesInX1 = gridParameters->numberOfNodesPerDirection[0];
     int &nodesInX2 = gridParameters->numberOfNodesPerDirection[1];
     int &nodesInX3 = gridParameters->numberOfNodesPerDirection[2];
-    logSimulationData(nodesInX1, nodesInX2, nodesInX3);
+
+    if (isMainProcess())
+        logSimulationData(nodesInX1, nodesInX2, nodesInX3);
 
     setBlockSize(nodesInX1, nodesInX2, nodesInX3);
     auto gridCube = makeSimulationBoundingBox();
 
     generateBlockGrid(gridCube);
-
     setKernelForcing(lbmKernel, converter);
     setBoundaryConditionProcessor(lbmKernel);
 
@@ -125,55 +128,44 @@ void Simulation::run()
 
     intHelper.selectBlocks();
 
-
     int numberOfProcesses = communicator->getNumberOfProcesses();
     SetKernelBlockVisitor kernelVisitor(lbmKernel, physicalParameters->latticeViscosity,
                                         numberOfProcesses);
     grid->accept(kernelVisitor);
     intHelper.setBC();
 
-    double bulkViscosity = physicalParameters->latticeViscosity * physicalParameters->bulkViscosityFactor;
-    //auto iProcessor = std::make_shared<CompressibleOffsetMomentsInterpolationProcessor>();
-    //iProcessor->setBulkViscosity(physicalParameters->latticeViscosity, bulkViscosity);
-
-    //SetConnectorsBlockVisitor setConnsVisitor(communicator, true,
-    //                                          lbmSystem->getNumberOfDirections(),
-    //                                          physicalParameters->latticeViscosity, iProcessor);
 
-    OneDistributionSetConnectorsBlockVisitor setConnsVisitor(communicator);
-    grid->accept(setConnsVisitor);
-
-    InitDistributionsBlockVisitor initVisitor;
-    grid->accept(initVisitor);
-    grid->accept(setConnsVisitor);
+    writeBlocksToFile(); // important: run this after metis & intHelper.selectBlocks()
+    setConnectors();
+    initializeDistributions();
     grid->accept(bcVisitor);
-
     writeBoundaryConditions();
-    // important: run this after metis & intHelper.selectBlocks()
-    writeBlocksToFile();
+
+#ifdef _OPENMP
+    omp_set_num_threads(simulationParameters->numberOfThreads);
+    if (isMainProcess())
+        UBLOG(logINFO, "OpenMP is set to run with " << simulationParameters->numberOfThreads << " threads")
+#endif
 
     auto visualizationScheduler = std::make_shared<UbScheduler>(simulationParameters->timeStepLogInterval);
     auto mqCoProcessor = makeMacroscopicQuantitiesCoProcessor(converter,
                                                               visualizationScheduler);
 
-    std::shared_ptr<UbScheduler> nupsScheduler(new UbScheduler(100, 100));
-    std::shared_ptr<CoProcessor> nupsCoProcessor(
-            new NUPSCounterCoProcessor(grid, nupsScheduler, simulationParameters->numberOfThreads, communicator));
-
-
-#ifdef _OPENMP
-    omp_set_num_threads(simulationParameters->numberOfThreads);
-    UBLOG(logINFO, "OpenMP is set to run with " << omp_get_num_threads() << " threads")
-#endif
+    auto nupsCoProcessor = makeNupsCoProcessor();
 
     auto calculator = std::make_shared<BasicCalculator>(grid, visualizationScheduler,
                                                         simulationParameters->numberOfTimeSteps);
     calculator->addCoProcessor(nupsCoProcessor);
     calculator->addCoProcessor(mqCoProcessor);
 
-    UBLOG(logINFO, "Simulation-start")
+    if (isMainProcess()) UBLOG(logINFO, "Simulation-start")
     calculator->calculate();
-    UBLOG(logINFO, "Simulation-end")
+    if (isMainProcess()) UBLOG(logINFO, "Simulation-end")
+}
+
+bool Simulation::isMainProcess()
+{
+    return communicator->getProcessID() == 0;
 }
 
 void
@@ -224,18 +216,7 @@ Simulation::makeLBMUnitConverter()
     return std::make_shared<LBMUnitConverter>();
 }
 
-std::shared_ptr<CoProcessor>
-Simulation::makeMacroscopicQuantitiesCoProcessor(const std::shared_ptr<LBMUnitConverter> &converter,
-                                                 const std::shared_ptr<UbScheduler> &visualizationScheduler) const
-{
-    auto mqCoProcessor = std::make_shared<WriteMacroscopicQuantitiesCoProcessor>(grid, visualizationScheduler,
-                                                                                 writerConfig.outputPath,
-                                                                                 writerConfig.getWriter(),
-                                                                                 converter,
-                                                                                 communicator);
-    mqCoProcessor->process(0);
-    return mqCoProcessor;
-}
+
 
 void Simulation::writeBoundaryConditions() const
 {
@@ -248,8 +229,9 @@ void Simulation::writeBoundaryConditions() const
 void Simulation::writeBlocksToFile() const
 {
     UBLOG(logINFO, "Write block grid to VTK-file")
+    auto scheduler = std::make_shared<UbScheduler>(1);
     auto ppblocks = std::make_shared<WriteBlocksCoProcessor>(grid,
-                                                             std::make_shared<UbScheduler>(1),
+                                                             scheduler,
                                                              writerConfig.outputPath,
                                                              writerConfig.getWriter(),
                                                              communicator);
@@ -258,17 +240,56 @@ void Simulation::writeBlocksToFile() const
 }
 
 std::shared_ptr<GbObject3D>
-Simulation::makeSimulationBoundingBox() const
+Simulation::makeSimulationBoundingBox()
 {
     auto box = gridParameters->boundingBox();
+    auto gridCube = std::make_shared<GbCuboid3D>(box->minX1, box->minX2, box->minX3, box->maxX1, box->maxX2,
+                                                 box->maxX3);
 
-    UBLOG(logINFO, "Bounding box dimensions = [("
-            << box->minX1 << ", " << box->minX2 << ", " << box->minX3 << "); ("
-            << box->maxX1 << ", " << box->maxX2 << ", " << box->maxX3 << ")]")
+    if (isMainProcess())
+    {
+        UBLOG(logINFO, "Bounding box dimensions = [("
+                << box->minX1 << ", " << box->minX2 << ", " << box->minX3 << "); ("
+                << box->maxX1 << ", " << box->maxX2 << ", " << box->maxX3 << ")]")
+
+        GbSystem3D::writeGeoObject(gridCube.get(), writerConfig.outputPath + "/geo/gridCube", writerConfig.getWriter());
+    }
 
-    auto gridCube = std::make_shared<GbCuboid3D>(box->minX1, box->minX2, box->minX3, box->maxX1, box->maxX2, box->maxX3);
-    GbSystem3D::writeGeoObject(gridCube.get(), writerConfig.outputPath + "/geo/gridCube", writerConfig.getWriter());
     return gridCube;
 }
 
+void Simulation::setConnectors()
+{
+    OneDistributionSetConnectorsBlockVisitor setConnsVisitor(communicator);
+    grid->accept(setConnsVisitor);
+}
+
+void Simulation::initializeDistributions()
+{
+    InitDistributionsBlockVisitor initVisitor;
+    grid->accept(initVisitor);
+}
+
+std::shared_ptr<CoProcessor>
+Simulation::makeMacroscopicQuantitiesCoProcessor(const std::shared_ptr<LBMUnitConverter> &converter,
+                                                 const std::shared_ptr<UbScheduler> &visualizationScheduler) const
+{
+    auto mqCoProcessor = std::make_shared<WriteMacroscopicQuantitiesCoProcessor>(grid, visualizationScheduler,
+                                                                                 writerConfig.outputPath,
+                                                                                 writerConfig.getWriter(),
+                                                                                 converter,
+                                                                                 communicator);
+    mqCoProcessor->process(0);
+    return mqCoProcessor;
+}
+
+std::shared_ptr<CoProcessor> Simulation::makeNupsCoProcessor() const
+{
+    auto scheduler = std::make_shared<UbScheduler>(100, 100);
+    return std::make_shared<NUPSCounterCoProcessor>(grid, scheduler,
+                                                    simulationParameters->numberOfThreads,
+                                                    communicator);
+}
+
+
 Simulation::~Simulation() = default;
diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt
index 29d77897ac4e6138057ec2b4f13f5f942bdb0e01..844e933d5c053aaeef38085c5c5a9e01721ff6aa 100644
--- a/src/gpu/GridGenerator/CMakeLists.txt
+++ b/src/gpu/GridGenerator/CMakeLists.txt
@@ -6,11 +6,5 @@ vf_add_library(PRIVATE_LINK basics OpenMP::OpenMP_CXX)
 vf_get_library_name(library_name)
 set_target_properties(${library_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
-# according to linker error when building static libraries.
-# https://stackoverflow.com/questions/50033435/cmake-cuda-separate-compilation-static-lib-link-error-on-windows-but-not-on-ubun
-if (NOT BUILD_SHARED_LIBRARY)
-    set_target_properties(${library_name} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-endif()
-
 # we want to suppress all cuda warnings so far for this library.
 target_compile_options(${library_name} PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe "-w" >)
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index ce1bb9d921f114b3d06143901355c56ab24b77fd..5c93599449cb4e5cef1c0112670068aa4e76abb3 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -5,10 +5,19 @@ if(MSVC)
     set(additional_libraries ws2_32 Traffic) # ws_32 throws an error on Phoenix
 endif()
 
-vf_add_library(PRIVATE_LINK ${additional_libraries} GridGenerator basics MPI::MPI_CXX)
+vf_add_library(PUBLIC_LINK basics lbmCuda PRIVATE_LINK ${additional_libraries} GridGenerator MPI::MPI_CXX)
+
 
 #SET(TPN_WIN32 "/EHsc")
 #https://stackoverflow.com/questions/6832666/lnk2019-when-including-asio-headers-solution-generated-with-cmake
 #https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax
 
+set_target_properties(VirtualFluids_GPU PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+
 vf_add_tests()
+
+if(BUILD_VF_UNIT_TESTS)
+    set_target_properties(VirtualFluids_GPUTests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+    set_source_files_properties(Kernel/Utilities/DistributionHelperTests.cpp PROPERTIES LANGUAGE CUDA)
+endif()
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 2b2a2e68d0980b4d3aa4b27a04bd8ce703704a05..957935756163795945e9ac1c4762e0eb825239ee 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -118,6 +118,12 @@ void GridProvider::allocAndCopyForcing()
 {
     cudaMemoryManager->cudaAllocForcing();
     cudaMemoryManager->cudaCopyForcingToDevice();
+
+    for (int level = para->getCoarse(); level <= para->getFine(); level++)
+    {
+        cudaMemoryManager->cudaAllocLevelForcing(level);
+        cudaMemoryManager->cudaCopyLevelForcingToDevice(level);
+    }
 }
 
 void GridProvider::allocAndCopyQuadricLimiters()
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index ce3082817776d5b8ed6237b027354956df3a8777..562140eb24bad470604b5782a816a4e60d5000f3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -12,6 +12,11 @@
 
 using namespace vf::lbm::constant;
 
+#include "lbm/MacroscopicQuantities.h"
+
+#include "../Kernel/Utilities/DistributionHelper.cuh"
+
+
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
@@ -22,196 +27,37 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
                                         unsigned int* neighborY,
                                         unsigned int* neighborZ,
                                         unsigned int size_Mat,
-                                        real* DD,
-                                        bool evenOrOdd)
+                                        real* distributions,
+                                        bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (evenOrOdd==true)
-   {
-      D.f[dirE   ] = &DD[dirE   *size_Mat];
-      D.f[dirW   ] = &DD[dirW   *size_Mat];
-      D.f[dirN   ] = &DD[dirN   *size_Mat];
-      D.f[dirS   ] = &DD[dirS   *size_Mat];
-      D.f[dirT   ] = &DD[dirT   *size_Mat];
-      D.f[dirB   ] = &DD[dirB   *size_Mat];
-      D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-      D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-      D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-      D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-      D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-      D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-      D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-      D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-      D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-      D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-      D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-      D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-      D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-      D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-      D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-      D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-      D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-   } 
-   else
-   {
-      D.f[dirW   ] = &DD[dirE   *size_Mat];
-      D.f[dirE   ] = &DD[dirW   *size_Mat];
-      D.f[dirS   ] = &DD[dirN   *size_Mat];
-      D.f[dirN   ] = &DD[dirS   *size_Mat];
-      D.f[dirB   ] = &DD[dirT   *size_Mat];
-      D.f[dirT   ] = &DD[dirB   *size_Mat];
-      D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-      D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-      D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-      D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-      D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-      D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-      D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-      D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-      D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-      D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-      D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-      D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-      D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-      D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-      D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-      D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-      D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   unsigned int  k;                   // Zugriff auf arrays im device
-   //
-   unsigned int tx = threadIdx.x;     // Thread index = lokaler i index
-   unsigned int by = blockIdx.x;      // Block index x
-   unsigned int bz = blockIdx.y;      // Block index y
-   unsigned int  x = tx + STARTOFFX;  // Globaler x-Index 
-   unsigned int  y = by + STARTOFFY;  // Globaler y-Index 
-   unsigned int  z = bz + STARTOFFZ;  // Globaler z-Index 
-
-   const unsigned sizeX = blockDim.x;
-   const unsigned sizeY = gridDim.x;
-   const unsigned nx = sizeX + 2 * STARTOFFX;
-   const unsigned ny = sizeY + 2 * STARTOFFY;
-
-   k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-   //index
-   unsigned int kzero= k;
-   unsigned int ke   = k;
-   unsigned int kw   = neighborX[k];
-   unsigned int kn   = k;
-   unsigned int ks   = neighborY[k];
-   unsigned int kt   = k;
-   unsigned int kb   = neighborZ[k];
-   unsigned int ksw  = neighborY[kw];
-   unsigned int kne  = k;
-   unsigned int kse  = ks;
-   unsigned int knw  = kw;
-   unsigned int kbw  = neighborZ[kw];
-   unsigned int kte  = k;
-   unsigned int kbe  = kb;
-   unsigned int ktw  = kw;
-   unsigned int kbs  = neighborZ[ks];
-   unsigned int ktn  = k;
-   unsigned int kbn  = kb;
-   unsigned int kts  = ks;
-   unsigned int ktse = ks;
-   unsigned int kbnw = kbw;
-   unsigned int ktnw = kw;
-   unsigned int kbse = kbs;
-   unsigned int ktsw = ksw;
-   unsigned int kbne = kb;
-   unsigned int ktne = k;
-   unsigned int kbsw = neighborZ[ksw];
-   //unsigned int nxny = nx*ny;
-   //unsigned int kzero= k;
-   //unsigned int ke   = k;
-   //unsigned int kw   = k + 1;
-   //unsigned int kn   = k;
-   //unsigned int ks   = k + nx;
-   //unsigned int kt   = k;
-   //unsigned int kb   = k + nxny;
-   //unsigned int ksw  = k + nx + 1;
-   //unsigned int kne  = k;
-   //unsigned int kse  = k + nx;
-   //unsigned int knw  = k + 1;
-   //unsigned int kbw  = k + nxny + 1;
-   //unsigned int kte  = k;
-   //unsigned int kbe  = k + nxny;
-   //unsigned int ktw  = k + 1;
-   //unsigned int kbs  = k + nxny + nx;
-   //unsigned int ktn  = k;
-   //unsigned int kbn  = k + nxny;
-   //unsigned int kts  = k + nx;
-   //unsigned int ktse = k + nx;
-   //unsigned int kbnw = k + nxny + 1;
-   //unsigned int ktnw = k + 1;
-   //unsigned int kbse = k + nxny + nx;
-   //unsigned int ktsw = k + nx + 1;
-   //unsigned int kbne = k + nxny;
-   //unsigned int ktne = k;
-   //unsigned int kbsw = k + nxny + nx + 1;
-   //////////////////////////////////////////////////////////////////////////
+   const unsigned int tx = threadIdx.x;    // Thread index = lokaler i index
+   const unsigned int by = blockIdx.x;     // Block index x
+   const unsigned int bz = blockIdx.y;     // Block index y
+   const unsigned int x = tx + STARTOFFX;  // Globaler x-Index 
+   const unsigned int y = by + STARTOFFY;  // Globaler y-Index 
+   const unsigned int z = bz + STARTOFFZ;  // Globaler z-Index 
+
+   const unsigned nx = blockDim.x + 2 * STARTOFFX;
+   const unsigned ny = gridDim.x + 2 * STARTOFFY;
+
+   const unsigned int k = nx*(ny*z + y) + x; // Zugriff auf arrays im device
+
    rhoD[k] = c0o1;
    vxD[k]  = c0o1;
    vyD[k]  = c0o1;
    vzD[k]  = c0o1;
 
-   if(geoD[k] == GEO_FLUID)
-   {
-      rhoD[k]    =   (D.f[dirE   ])[ke  ]+ (D.f[dirW   ])[kw  ]+ 
-                     (D.f[dirN   ])[kn  ]+ (D.f[dirS   ])[ks  ]+
-                     (D.f[dirT   ])[kt  ]+ (D.f[dirB   ])[kb  ]+
-                     (D.f[dirNE  ])[kne ]+ (D.f[dirSW  ])[ksw ]+
-                     (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                     (D.f[dirTE  ])[kte ]+ (D.f[dirBW  ])[kbw ]+
-                     (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                     (D.f[dirTN  ])[ktn ]+ (D.f[dirBS  ])[kbs ]+
-                     (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                     (D.f[dirZERO])[kzero]+ 
-                     (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                     (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                     (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ 
-                     (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw];
-
-      vxD[k]     =   (D.f[dirE   ])[ke  ]- (D.f[dirW   ])[kw  ]+ 
-                     (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]+
-                     (D.f[dirSE  ])[kse ]- (D.f[dirNW  ])[knw ]+
-                     (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]+
-                     (D.f[dirBE  ])[kbe ]- (D.f[dirTW  ])[ktw ]+
-                     (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]+ 
-                     (D.f[dirTSE ])[ktse]- (D.f[dirTNW ])[ktnw]+ 
-                     (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]+ 
-                     (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw];
-
-      vyD[k]     =   (D.f[dirN   ])[kn  ]- (D.f[dirS   ])[ks  ]+
-                     (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]-
-                     (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                     (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]+
-                     (D.f[dirBN  ])[kbn ]- (D.f[dirTS  ])[kts ]+
-                     (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]- 
-                     (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                     (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                     (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw];
-
-      vzD[k]     =   (D.f[dirT   ])[kt  ]- (D.f[dirB   ])[kb  ]+
-                     (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]-
-                     (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                     (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]-
-                     (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                     (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                     (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]- 
-                     (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                     (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw];
-   }
+   if(!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k]))
+      return;
+
+   vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY, neighborZ);
+   const auto& distribution = distr_wrapper.distribution;
+
+   rhoD[k] = vf::lbm::getDensity(distribution.f);
+   vxD[k] = vf::lbm::getIncompressibleVelocityX1(distribution.f);
+   vyD[k] = vf::lbm::getIncompressibleVelocityX2(distribution.f);
+   vzD[k] = vf::lbm::getIncompressibleVelocityX3(distribution.f);
+
 }
 
 
@@ -412,251 +258,34 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
 }
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacCompSP27( real* vxD,
-											  real* vyD,
-											  real* vzD,
-											  real* rhoD,
-											  real* pressD,
-											  unsigned int* geoD,
-											  unsigned int* neighborX,
-											  unsigned int* neighborY,
-											  unsigned int* neighborZ,
-											  unsigned int size_Mat,
-											  real* DD,
-											  bool evenOrOdd)
+extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, real *rhoD, real *pressD,
+                                             unsigned int *geoD, unsigned int *neighborX, unsigned int *neighborY,
+                                             unsigned int *neighborZ, unsigned int size_Mat, real *distributions,
+                                             bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (evenOrOdd==true)
-   {
-      D.f[dirE   ] = &DD[dirE   *size_Mat];
-      D.f[dirW   ] = &DD[dirW   *size_Mat];
-      D.f[dirN   ] = &DD[dirN   *size_Mat];
-      D.f[dirS   ] = &DD[dirS   *size_Mat];
-      D.f[dirT   ] = &DD[dirT   *size_Mat];
-      D.f[dirB   ] = &DD[dirB   *size_Mat];
-      D.f[dirNE  ] = &DD[dirNE  *size_Mat];
-      D.f[dirSW  ] = &DD[dirSW  *size_Mat];
-      D.f[dirSE  ] = &DD[dirSE  *size_Mat];
-      D.f[dirNW  ] = &DD[dirNW  *size_Mat];
-      D.f[dirTE  ] = &DD[dirTE  *size_Mat];
-      D.f[dirBW  ] = &DD[dirBW  *size_Mat];
-      D.f[dirBE  ] = &DD[dirBE  *size_Mat];
-      D.f[dirTW  ] = &DD[dirTW  *size_Mat];
-      D.f[dirTN  ] = &DD[dirTN  *size_Mat];
-      D.f[dirBS  ] = &DD[dirBS  *size_Mat];
-      D.f[dirBN  ] = &DD[dirBN  *size_Mat];
-      D.f[dirTS  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirTNE *size_Mat];
-      D.f[dirTSW ] = &DD[dirTSW *size_Mat];
-      D.f[dirTSE ] = &DD[dirTSE *size_Mat];
-      D.f[dirTNW ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNE ] = &DD[dirBNE *size_Mat];
-      D.f[dirBSW ] = &DD[dirBSW *size_Mat];
-      D.f[dirBSE ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNW ] = &DD[dirBNW *size_Mat];
-   } 
-   else
-   {
-      D.f[dirW   ] = &DD[dirE   *size_Mat];
-      D.f[dirE   ] = &DD[dirW   *size_Mat];
-      D.f[dirS   ] = &DD[dirN   *size_Mat];
-      D.f[dirN   ] = &DD[dirS   *size_Mat];
-      D.f[dirB   ] = &DD[dirT   *size_Mat];
-      D.f[dirT   ] = &DD[dirB   *size_Mat];
-      D.f[dirSW  ] = &DD[dirNE  *size_Mat];
-      D.f[dirNE  ] = &DD[dirSW  *size_Mat];
-      D.f[dirNW  ] = &DD[dirSE  *size_Mat];
-      D.f[dirSE  ] = &DD[dirNW  *size_Mat];
-      D.f[dirBW  ] = &DD[dirTE  *size_Mat];
-      D.f[dirTE  ] = &DD[dirBW  *size_Mat];
-      D.f[dirTW  ] = &DD[dirBE  *size_Mat];
-      D.f[dirBE  ] = &DD[dirTW  *size_Mat];
-      D.f[dirBS  ] = &DD[dirTN  *size_Mat];
-      D.f[dirTN  ] = &DD[dirBS  *size_Mat];
-      D.f[dirTS  ] = &DD[dirBN  *size_Mat];
-      D.f[dirBN  ] = &DD[dirTS  *size_Mat];
-      D.f[dirZERO] = &DD[dirZERO*size_Mat];
-      D.f[dirTNE ] = &DD[dirBSW *size_Mat];
-      D.f[dirTSW ] = &DD[dirBNE *size_Mat];
-      D.f[dirTSE ] = &DD[dirBNW *size_Mat];
-      D.f[dirTNW ] = &DD[dirBSE *size_Mat];
-      D.f[dirBNE ] = &DD[dirTSW *size_Mat];
-      D.f[dirBSW ] = &DD[dirTNE *size_Mat];
-      D.f[dirBSE ] = &DD[dirTNW *size_Mat];
-      D.f[dirBNW ] = &DD[dirTSE *size_Mat];
-   }
-   ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
-
-   const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
-
-   if(k<size_Mat)
-   {
-      //////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int kzero= k;
-      unsigned int ke   = k;
-      unsigned int kw   = neighborX[k];
-      unsigned int kn   = k;
-      unsigned int ks   = neighborY[k];
-      unsigned int kt   = k;
-      unsigned int kb   = neighborZ[k];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = k;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = k;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = k;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = k;
-      unsigned int kbsw = neighborZ[ksw];
-      //////////////////////////////////////////////////////////////////////////
-      pressD[k] = c0o1;
-	  rhoD[k]   = c0o1;
-	  vxD[k]    = c0o1;
-	  vyD[k]    = c0o1;
-	  vzD[k]    = c0o1;
-
-      if(geoD[k] == GEO_FLUID || geoD[k] == GEO_PM_0 || geoD[k] == GEO_PM_1 || geoD[k] == GEO_PM_2)
-      {
-         rhoD[k]    =   (D.f[dirE   ])[ke  ]+ (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirN   ])[kn  ]+ (D.f[dirS   ])[ks  ]+
-                        (D.f[dirT   ])[kt  ]+ (D.f[dirB   ])[kb  ]+
-                        (D.f[dirNE  ])[kne ]+ (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]+ (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]+ (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                        (D.f[dirZERO])[kzero]+ 
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw];
-
-         vxD[k]     =  ((D.f[dirE   ])[ke  ]- (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]- (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]- (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]- (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]+ 
-						(D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]);
-
-         vyD[k]     =  ((D.f[dirN   ])[kn  ]- (D.f[dirS   ])[ks  ]+
-                        (D.f[dirNE  ])[kne ]- (D.f[dirSW  ])[ksw ]-
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]- (D.f[dirTS  ])[kts ]+
-                        (D.f[dirTNE ])[ktne]- (D.f[dirTSW ])[ktsw]- 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]);
-
-         vzD[k]     =  ((D.f[dirT   ])[kt  ]- (D.f[dirB   ])[kb  ]+
-                        (D.f[dirTE  ])[kte ]- (D.f[dirBW  ])[kbw ]-
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]- (D.f[dirBS  ])[kbs ]-
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ]+
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]- 
-                        (D.f[dirBNE ])[kbne]- (D.f[dirBSW ])[kbsw]- 
-                        (D.f[dirBSE ])[kbse]- (D.f[dirBNW ])[kbnw]) / (c1o1 + rhoD[k]);
-
-         pressD[k]  =  ((D.f[dirE   ])[ke  ]+ (D.f[dirW   ])[kw  ]+ 
-                        (D.f[dirN   ])[kn  ]+ (D.f[dirS   ])[ks  ]+
-                        (D.f[dirT   ])[kt  ]+ (D.f[dirB   ])[kb  ]+
-                        c2o1*(
-                        (D.f[dirNE  ])[kne ]+ (D.f[dirSW  ])[ksw ]+
-                        (D.f[dirSE  ])[kse ]+ (D.f[dirNW  ])[knw ]+
-                        (D.f[dirTE  ])[kte ]+ (D.f[dirBW  ])[kbw ]+
-                        (D.f[dirBE  ])[kbe ]+ (D.f[dirTW  ])[ktw ]+
-                        (D.f[dirTN  ])[ktn ]+ (D.f[dirBS  ])[kbs ]+
-                        (D.f[dirBN  ])[kbn ]+ (D.f[dirTS  ])[kts ])+
-                        c3o1*(
-                        (D.f[dirTNE ])[ktne]+ (D.f[dirTSW ])[ktsw]+ 
-                        (D.f[dirTSE ])[ktse]+ (D.f[dirTNW ])[ktnw]+ 
-                        (D.f[dirBNE ])[kbne]+ (D.f[dirBSW ])[kbsw]+ 
-                        (D.f[dirBSE ])[kbse]+ (D.f[dirBNW ])[kbnw])-
-                        rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case   
-         //achtung op hart gesetzt Annahme op = 1 ;                                                      ^^^^(1.0/op-0.5)=0.5
-
-      }
-   }
+    const unsigned k = vf::gpu::getNodeIndex();
+
+    pressD[k] = c0o1;
+    rhoD[k]   = c0o1;
+    vxD[k]    = c0o1;
+    vyD[k]    = c0o1;
+    vzD[k]    = c0o1;
+
+    if (!vf::gpu::isValidFluidNode(k, size_Mat, geoD[k]))
+        return;
+
+    vf::gpu::DistributionWrapper distr_wrapper(distributions, size_Mat, isEvenTimestep, k, neighborX, neighborY,
+                                               neighborZ);
+    const auto &distribution = distr_wrapper.distribution;
+
+    rhoD[k]   = vf::lbm::getDensity(distribution.f);
+    vxD[k]    = vf::lbm::getCompressibleVelocityX1(distribution.f, rhoD[k]);
+    vyD[k]    = vf::lbm::getCompressibleVelocityX2(distribution.f, rhoD[k]);
+    vzD[k]    = vf::lbm::getCompressibleVelocityX3(distribution.f, rhoD[k]);
+    pressD[k] = vf::lbm::getPressure(distribution.f, rhoD[k], vxD[k], vyD[k], vzD[k]); 
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void LBCalcMacThS7( real* Conc,
                                           unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 6c1585bd689a5f2e2da603d5e9fdb81cbc175aab..aeafe342f0680763fbbffe63cf1c6760e61c2102 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -6,6 +6,8 @@
 
 #include <Parameter/Parameter.h>
 
+#include <lbm/constants/NumericConstants.h>
+
 void CudaMemoryManager::cudaAllocFull(int lev)
 {
     checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(lev)->geo      ), parameter->getParH(lev)->mem_size_int  ));
@@ -404,6 +406,44 @@ void CudaMemoryManager::cudaFreeForcing()
 {
 	checkCudaErrors( cudaFreeHost(parameter->getForcesHost()));
 }
+
+void CudaMemoryManager::cudaAllocLevelForcing(int level)
+{
+    real fx_t{ 1. }, fy_t{ 1. }, fz_t{ 1. };
+    for (int i = 0; i < level; i++) {
+        fx_t *= vf::lbm::constant::c2o1;
+        fy_t *= vf::lbm::constant::c2o1;
+        fz_t *= vf::lbm::constant::c2o1;
+    }
+
+    const unsigned int mem_size = sizeof(real) * 3;
+
+    //Host
+    checkCudaErrors( cudaMallocHost((void**) &(parameter->getParH(level)->forcing), mem_size));
+    parameter->getParH(level)->forcing[0] = parameter->forcingH[0] / fx_t;
+    parameter->getParH(level)->forcing[1] = parameter->forcingH[1] / fy_t;
+    parameter->getParH(level)->forcing[2] = parameter->forcingH[2] / fz_t;
+
+	//Device
+	checkCudaErrors( cudaMalloc((void**) &parameter->getParD(level)->forcing, mem_size));
+	//////////////////////////////////////////////////////////////////////////
+	const double tmp = (double)mem_size;
+	setMemsizeGPU(tmp, false);
+}
+
+void CudaMemoryManager::cudaCopyLevelForcingToDevice(int level)
+{
+	unsigned int mem_size = sizeof(real) * 3;
+	checkCudaErrors( cudaMemcpy(parameter->getParD(level)->forcing, parameter->getParH(level)->forcing, mem_size, cudaMemcpyHostToDevice));
+}
+
+void CudaMemoryManager::cudaFreeLevelForcing(int level)
+{
+	checkCudaErrors( cudaFreeHost(parameter->getParH(level)->forcing));
+    checkCudaErrors( cudaFree(parameter->getParD(level)->forcing));
+}
+
+
 //quadric Limiters
 void CudaMemoryManager::cudaAllocQuadricLimiters()
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index c26db65afd384ac7a4e6c436c4dea2c46647ae95..4853205f510348a954fbc8dcdaed72385a30d559 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -78,6 +78,10 @@ public:
 	void cudaCopyForcingToHost();
 	void cudaFreeForcing();
 
+    void cudaAllocLevelForcing(int level);
+	void cudaCopyLevelForcingToDevice(int level);
+	void cudaFreeLevelForcing(int level);
+
 	void cudaAllocQuadricLimiters();
 	void cudaCopyQuadricLimitersToDevice();
 	void cudaFreeQuadricLimiters();
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
index bb34a11a7337a499f8558a48d27c3547932618a6..9dc75cfa6019a29350263f9a554aa9a489566cfa 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
@@ -1,10 +1,35 @@
-//  _    ___      __              __________      _     __        ______________   __
-// | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
-// | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
-// |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
 //
-//////////////////////////////////////////////////////////////////////////
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Cumulant27chim.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr
+//=======================================================================================
 /* Device code */
 #include "LBM/LB.h" 
 #include "LBM/D3Q27.h"
@@ -13,951 +38,882 @@
 using namespace vf::lbm::constant;
 #include "math.h"
 
-
-////////////////////////////////////////////////////////////////////////////////
-inline __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) {
-	real m2 = mfa + mfc;
-	real m1 = mfc - mfa;
-	real m0 = m2 + mfb;
-	mfa = m0;
-	m0 *= Kinverse;
-	m0 += c1o1;
-	mfb = (m1*Kinverse - m0 * vv) * K;
-	mfc = ((m2 - c2o1*	m1 * vv)*Kinverse + v2 * m0) * K;
-}
-
-inline __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real Kinverse, real K) {
-	real m0 = (((mfc - mfb) * c1o2 + mfb *  vv)*Kinverse + (mfa*Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
-	real m1 = (((mfa - mfc) -  c2o1 * mfb *  vv)*Kinverse + (mfa*Kinverse + c1o1) * (           -v2)) * K;
-	mfc     = (((mfc + mfb) * c1o2 + mfb *  vv)*Kinverse + (mfa*Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
-	mfa = m0;
-	mfb = m1;
-}
-////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-
-inline __device__ void forwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K) {
-
-	real m2 = mfa + mfc;
-	real m1 = mfc - mfa;
-	real m0 = m2 + mfb;
-	mfa = m0;
-	//m0     += K;
-	mfb = (m1 - K*vv) - m0 * vv;
-	mfc = ((m2 - c2o1*	m1 * vv) + v2*K) + v2 * m0;
-	//m0 += K;
-	//mfb = m1 - m0 * vv;
-	//mfc = m2 - two*	m1 * vv + v2 * m0;
-}
-
-inline __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) {
-	real m1 = (mfa + mfc) + mfb;
-	real m2 = mfc - mfa;
-	mfc = (mfc + mfa) + (v2*m1 - c2o1*vv*m2);
-	mfb = m2 - vv*m1;
-	mfa = m1;
-}
-
-
-inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2) {
-	real ma = (mfc + mfa*(v2 - vv))*c1o2 + mfb*(vv - c1o2);
-	real mb = ((mfa - mfc) - mfa*v2) - c2o1*mfb*vv;
-	mfc = (mfc + mfa*(v2 + vv))*c1o2 + mfb*(vv + c1o2);
-	mfb = mb;
-	mfa = ma;
-}
-
-
-inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K) {
-	real  m0 = (mfc - mfb)* c1o2 + mfb * (vv)+(mfa + K) * (v2 - vv) * c1o2;
-	real m1 = (mfa - mfc) - c2o1* mfb * vv + (mfa + K) * (-v2);
-	mfc = (mfc + mfb)* c1o2 + mfb * (vv)+(mfa + K) * (v2 + vv) * c1o2;
-	mfa = m0;
-	mfb = m1;
-
-}
-
-
-
-
+#include <lbm/Chimera.h>
 
 
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		unsigned int BC;
-		BC = bcMatD[k];
-
-		if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
-		{
-			Distributions27 D;
-			if (EvenOrOdd == true)
-			{
-				D.f[dirE] = &DDStart[dirE   *size_Mat];
-				D.f[dirW] = &DDStart[dirW   *size_Mat];
-				D.f[dirN] = &DDStart[dirN   *size_Mat];
-				D.f[dirS] = &DDStart[dirS   *size_Mat];
-				D.f[dirT] = &DDStart[dirT   *size_Mat];
-				D.f[dirB] = &DDStart[dirB   *size_Mat];
-				D.f[dirNE] = &DDStart[dirNE  *size_Mat];
-				D.f[dirSW] = &DDStart[dirSW  *size_Mat];
-				D.f[dirSE] = &DDStart[dirSE  *size_Mat];
-				D.f[dirNW] = &DDStart[dirNW  *size_Mat];
-				D.f[dirTE] = &DDStart[dirTE  *size_Mat];
-				D.f[dirBW] = &DDStart[dirBW  *size_Mat];
-				D.f[dirBE] = &DDStart[dirBE  *size_Mat];
-				D.f[dirTW] = &DDStart[dirTW  *size_Mat];
-				D.f[dirTN] = &DDStart[dirTN  *size_Mat];
-				D.f[dirBS] = &DDStart[dirBS  *size_Mat];
-				D.f[dirBN] = &DDStart[dirBN  *size_Mat];
-				D.f[dirTS] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirTNE] = &DDStart[dirTNE *size_Mat];
-				D.f[dirTSW] = &DDStart[dirTSW *size_Mat];
-				D.f[dirTSE] = &DDStart[dirTSE *size_Mat];
-				D.f[dirTNW] = &DDStart[dirTNW *size_Mat];
-				D.f[dirBNE] = &DDStart[dirBNE *size_Mat];
-				D.f[dirBSW] = &DDStart[dirBSW *size_Mat];
-				D.f[dirBSE] = &DDStart[dirBSE *size_Mat];
-				D.f[dirBNW] = &DDStart[dirBNW *size_Mat];
-			}
-			else
-			{
-				D.f[dirW] = &DDStart[dirE   *size_Mat];
-				D.f[dirE] = &DDStart[dirW   *size_Mat];
-				D.f[dirS] = &DDStart[dirN   *size_Mat];
-				D.f[dirN] = &DDStart[dirS   *size_Mat];
-				D.f[dirB] = &DDStart[dirT   *size_Mat];
-				D.f[dirT] = &DDStart[dirB   *size_Mat];
-				D.f[dirSW] = &DDStart[dirNE  *size_Mat];
-				D.f[dirNE] = &DDStart[dirSW  *size_Mat];
-				D.f[dirNW] = &DDStart[dirSE  *size_Mat];
-				D.f[dirSE] = &DDStart[dirNW  *size_Mat];
-				D.f[dirBW] = &DDStart[dirTE  *size_Mat];
-				D.f[dirTE] = &DDStart[dirBW  *size_Mat];
-				D.f[dirTW] = &DDStart[dirBE  *size_Mat];
-				D.f[dirBE] = &DDStart[dirTW  *size_Mat];
-				D.f[dirBS] = &DDStart[dirTN  *size_Mat];
-				D.f[dirTN] = &DDStart[dirBS  *size_Mat];
-				D.f[dirTS] = &DDStart[dirBN  *size_Mat];
-				D.f[dirBN] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirBSW] = &DDStart[dirTNE *size_Mat];
-				D.f[dirBNE] = &DDStart[dirTSW *size_Mat];
-				D.f[dirBNW] = &DDStart[dirTSE *size_Mat];
-				D.f[dirBSE] = &DDStart[dirTNW *size_Mat];
-				D.f[dirTSW] = &DDStart[dirBNE *size_Mat];
-				D.f[dirTNE] = &DDStart[dirBSW *size_Mat];
-				D.f[dirTNW] = &DDStart[dirBSE *size_Mat];
-				D.f[dirTSE] = &DDStart[dirBNW *size_Mat];
-			}
-
-			////////////////////////////////////////////////////////////////////////////////
-			//index
-			//unsigned int kzero= k;
-			//unsigned int ke   = k;
-			unsigned int kw = neighborX[k];
-			//unsigned int kn   = k;
-			unsigned int ks = neighborY[k];
-			//unsigned int kt   = k;
-			unsigned int kb = neighborZ[k];
-			unsigned int ksw = neighborY[kw];
-			//unsigned int kne  = k;
-			//unsigned int kse  = ks;
-			//unsigned int knw  = kw;
-			unsigned int kbw = neighborZ[kw];
-			//unsigned int kte  = k;
-			//unsigned int kbe  = kb;
-			//unsigned int ktw  = kw;
-			unsigned int kbs = neighborZ[ks];
-			//unsigned int ktn  = k;
-			//unsigned int kbn  = kb;
-			//unsigned int kts  = ks;
-			//unsigned int ktse = ks;
-			//unsigned int kbnw = kbw;
-			//unsigned int ktnw = kw;
-			//unsigned int kbse = kbs;
-			//unsigned int ktsw = ksw;
-			//unsigned int kbne = kb;
-			//unsigned int ktne = k;
-			unsigned int kbsw = neighborZ[ksw];
-
-
-
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[dirE])[k];//[ke   ];// +  c2over27 ;(D.f[dirE   ])[k  ];//ke
-			real mfabb = (D.f[dirW])[kw];//[kw   ];// +  c2over27 ;(D.f[dirW   ])[kw ];
-			real mfbcb = (D.f[dirN])[k];//[kn   ];// +  c2over27 ;(D.f[dirN   ])[k  ];//kn
-			real mfbab = (D.f[dirS])[ks];//[ks   ];// +  c2over27 ;(D.f[dirS   ])[ks ];
-			real mfbbc = (D.f[dirT])[k];//[kt   ];// +  c2over27 ;(D.f[dirT   ])[k  ];//kt
-			real mfbba = (D.f[dirB])[kb];//[kb   ];// +  c2over27 ;(D.f[dirB   ])[kb ];
-			real mfccb = (D.f[dirNE])[k];//[kne  ];// +  c1over54 ;(D.f[dirNE  ])[k  ];//kne
-			real mfaab = (D.f[dirSW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[dirSW  ])[ksw];
-			real mfcab = (D.f[dirSE])[ks];//[kse  ];// +  c1over54 ;(D.f[dirSE  ])[ks ];//kse
-			real mfacb = (D.f[dirNW])[kw];//[knw  ];// +  c1over54 ;(D.f[dirNW  ])[kw ];//knw
-			real mfcbc = (D.f[dirTE])[k];//[kte  ];// +  c1over54 ;(D.f[dirTE  ])[k  ];//kte
-			real mfaba = (D.f[dirBW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[dirBW  ])[kbw];
-			real mfcba = (D.f[dirBE])[kb];//[kbe  ];// +  c1over54 ;(D.f[dirBE  ])[kb ];//kbe
-			real mfabc = (D.f[dirTW])[kw];//[ktw  ];// +  c1over54 ;(D.f[dirTW  ])[kw ];//ktw
-			real mfbcc = (D.f[dirTN])[k];//[ktn  ];// +  c1over54 ;(D.f[dirTN  ])[k  ];//ktn
-			real mfbaa = (D.f[dirBS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[dirBS  ])[kbs];
-			real mfbca = (D.f[dirBN])[kb];//[kbn  ];// +  c1over54 ;(D.f[dirBN  ])[kb ];//kbn
-			real mfbac = (D.f[dirTS])[ks];//[kts  ];// +  c1over54 ;(D.f[dirTS  ])[ks ];//kts
-			real mfbbb = (D.f[dirZERO])[k];//[kzero];// +  c8over27 ;(D.f[dirZERO])[k  ];//kzero
-			real mfccc = (D.f[dirTNE])[k];//[ktne ];// +  c1over216;(D.f[dirTNE ])[k  ];//ktne
-			real mfaac = (D.f[dirTSW])[ksw];//[ktsw ];// +  c1over216;(D.f[dirTSW ])[ksw];//ktsw
-			real mfcac = (D.f[dirTSE])[ks];//[ktse ];// +  c1over216;(D.f[dirTSE ])[ks ];//ktse
-			real mfacc = (D.f[dirTNW])[kw];//[ktnw ];// +  c1over216;(D.f[dirTNW ])[kw ];//ktnw
-			real mfcca = (D.f[dirBNE])[kb];//[kbne ];// +  c1over216;(D.f[dirBNE ])[kb ];//kbne
-			real mfaaa = (D.f[dirBSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[dirBSW ])[kbsw];
-			real mfcaa = (D.f[dirBSE])[kbs];//[kbse ];// +  c1over216;(D.f[dirBSE ])[kbs];//kbse
-			real mfaca = (D.f[dirBNW])[kbw];//[kbnw ];// +  c1over216;(D.f[dirBNW ])[kbw];//kbnw
-											   ////////////////////////////////////////////////////////////////////////////////////
-			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-			real rho = c1o1 + drho;
-			////////////////////////////////////////////////////////////////////////////////////
-			real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-				(mfcbb - mfabb)) / rho;
-			real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-				(mfbcb - mfbab)) / rho;
-			real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-				(mfbbc - mfbba)) / rho;
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
-			real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
-			real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
-			vvx += fx*c1o2;
-			vvy += fy*c1o2;
-			vvz += fz*c1o2;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real omega = omega_in;
-			////////////////////////////////////////////////////////////////////////////////////
-			//fast
-			//real oMdrho = c1o1; // comp special
-			//real m0, m1, m2;
-			real vx2;
-			real vy2;
-			real vz2;
-			vx2 = vvx*vvx;
-			vy2 = vvy*vvy;
-			vz2 = vvz*vvz;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real wadjust;
-			//real qudricLimitP = c1o100;// * 0.0001f;
-			//real qudricLimitM = c1o100;// * 0.0001f;
-			//real qudricLimitD = c1o100;// * 0.001f;
-			//real s9 = minusomega;
-			//test
-			//s9 = 0.;
-
-
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real EQcbb = c0o1;
-			real EQabb = c0o1;
-			real EQbcb = c0o1;
-			real EQbab = c0o1;
-			real EQbbc = c0o1;
-			real EQbba = c0o1;
-			real EQccb = c0o1;
-			real EQaab = c0o1;
-			real EQcab = c0o1;
-			real EQacb = c0o1;
-			real EQcbc = c0o1;
-			real EQaba = c0o1;
-			real EQcba = c0o1;
-			real EQabc = c0o1;
-			real EQbcc = c0o1;
-			real EQbaa = c0o1;
-			real EQbca = c0o1;
-			real EQbac = c0o1;
-			real EQbbb = c0o1;
-			real EQccc = drho * c1o27;
-			real EQaac = drho * c1o3;
-			real EQcac = drho * c1o9;
-			real EQacc = drho * c1o9;
-			real EQcca = drho * c1o9;
-			real EQaaa = drho;
-			real EQcaa = drho * c1o3;
-			real EQaca = drho * c1o3;
-			////////////////////////////////////////////////////////////////////////////////////
-			backwardChimeraWithK(EQaaa, EQaab, EQaac, vvz, vz2, c1o1);
-			backwardChimeraWithK(EQaca, EQacb, EQacc, vvz, vz2, c1o3);
-			///////////////////////////////////////////////////////////
-			EQcaa = EQaca; EQcab = EQacb; EQcac = EQacc;
-			///////////////////////////////////////////////////////////
-			backwardChimeraWithK(EQcca, EQccb, EQccc, vvz, vz2, c1o9);
-
-			backwardChimeraWithK(EQaaa, EQaba, EQaca, vvy, vy2, c1o6);
-			backwardChimeraWithK(EQaab, EQabb, EQacb, vvy, vy2, c2o3);
-			backwardChimeraWithK(EQaac, EQabc, EQacc, vvy, vy2, c1o6);
-			backwardChimeraWithK(EQcaa, EQcba, EQcca, vvy, vy2, c1o18);
-			backwardChimeraWithK(EQcab, EQcbb, EQccb, vvy, vy2, c2o9);
-			backwardChimeraWithK(EQcac, EQcbc, EQccc, vvy, vy2, c1o18);
-
-			backwardChimeraWithK(EQaaa, EQbaa, EQcaa, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQaab, EQbab, EQcab, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQaac, EQbac, EQcac, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQaba, EQbba, EQcba, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQabb, EQbbb, EQcbb, vvx, vx2, c4o9);
-			backwardChimeraWithK(EQabc, EQbbc, EQcbc, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQaca, EQbca, EQcca, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQacb, EQbcb, EQccb, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQacc, EQbcc, EQccc, vvx, vx2, c1o36);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//Pre-condition
-			mfcbb -= EQcbb;
-			mfabb -= EQabb;
-			mfbcb -= EQbcb;
-			mfbab -= EQbab;
-			mfbbc -= EQbbc;
-			mfbba -= EQbba;
-			mfccb -= EQccb;
-			mfaab -= EQaab;
-			mfcab -= EQcab;
-			mfacb -= EQacb;
-			mfcbc -= EQcbc;
-			mfaba -= EQaba;
-			mfcba -= EQcba;
-			mfabc -= EQabc;
-			mfbcc -= EQbcc;
-			mfbaa -= EQbaa;
-			mfbca -= EQbca;
-			mfbac -= EQbac;
-			mfbbb -= EQbbb;
-			mfccc -= EQccc;
-			mfaac -= EQaac;
-			mfcac -= EQcac;
-			mfacc -= EQacc;
-			mfcca -= EQcca;
-			mfaaa -= EQaaa;
-			mfcaa -= EQcaa;
-			mfaca -= EQaca;
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//Hin
-			////////////////////////////////////////////////////////////////////////////////////
-			forwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
-			forwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			forwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
-			forwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			forwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			forwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			forwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
-			forwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			forwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
-
-			forwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
-			forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			forwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
-			forwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
-			forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			forwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
-			forwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
-			forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			forwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
-
-			forwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
-			forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			forwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
-			forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			forwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
-			forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			forwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			////Hin
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Z - Dir
-			//forwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c4o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Y - Dir
-			//forwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c2o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c2o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// X - Dir
-			//forwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, one);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// Cumulants
-			////////////////////////////////////////////////////////////////////////////////////
-			real OxxPyyPzz = c1o1; //omega; // one;	//set the bulk viscosity one is high / two is very low and zero is (too) high
-
-			////////////////////////////////////////////////////////////
-			//3.
-			//////////////////////////////
-			real OxyyPxzz = c1o1;
-			real OxyyMxzz = c1o1;
-			//real Oxyz = c1o1;
-			////////////////////////////////////////////////////////////
-			//4.
-			//////////////////////////////
-			real O4 = c1o1;
-			////////////////////////////////////////////////////////////
-			//5.
-			//////////////////////////////
-			real O5 = c1o1;
-			////////////////////////////////////////////////////////////
-			//6.
-			//////////////////////////////
-			real O6 = c1o1;
-			////////////////////////////////////////////////////////////
-
-
-			//central moments to cumulants
-			//4.
-			real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-
-			real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-
-
-
-
-			//2.
-			// linear combinations
-			real mxxPyyPzz = mfcaa + mfaca + mfaac;
-			real mxxMyy = mfcaa - mfaca;
-			real mxxMzz = mfcaa - mfaac;
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
-			{
-				real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
-				real dyuy = dxux + omega * c3o2 * mxxMyy;
-				real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-				//relax
-				mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-				mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-				mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-			}
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			////no correction
-			//mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
-			//mxxMyy += -(-omega) * (-mxxMyy);
-			//mxxMzz += -(-omega) * (-mxxMzz);
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			mfabb += omega * (-mfabb);
-			mfbab += omega * (-mfbab);
-			mfbba += omega * (-mfbba);
-
-			//////////////////////////////////////////////////////////////////////////
-
-			// linear combinations back
-			mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
-
-			//3.
-			// linear combinations
-
-			real mxxyPyzz = mfcba + mfabc;
-			real mxxyMyzz = mfcba - mfabc;
-
-			real mxxzPyyz = mfcab + mfacb;
-			real mxxzMyyz = mfcab - mfacb;
-
-			real mxyyPxzz = mfbca + mfbac;
-			real mxyyMxzz = mfbca - mfbac;
-
-			//relax
-			//////////////////////////////////////////////////////////////////////////
-			mfbbb += OxyyMxzz * (-mfbbb);
-			mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
-			mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
-			mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
-			mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
-			mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
-			mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
-			//////////////////////////////////////////////////////////////////////////
-
-			mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-			mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-			mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-			mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-			mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-			mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-
-			//4.
-			//////////////////////////////////////////////////////////////////////////
-			CUMacc += O4 * (-CUMacc);
-			CUMcac += O4 * (-CUMcac);
-			CUMcca += O4 * (-CUMcca);
-
-			CUMbbc += O4 * (-CUMbbc);
-			CUMbcb += O4 * (-CUMbcb);
-			CUMcbb += O4 * (-CUMcbb);
-			//////////////////////////////////////////////////////////////////////////
-
-
-			//5.
-			CUMbcc += O5 * (-CUMbcc);
-			CUMcbc += O5 * (-CUMcbc);
-			CUMccb += O5 * (-CUMccb);
-
-			//6.
-			CUMccc += O6 * (-CUMccc);
-
-
-
-			//back cumulants to central moments
-			//4.
-			mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-			mfccc = CUMccc - ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			mfbaa = -mfbaa;
-			mfaba = -mfaba;
-			mfaab = -mfaab;
-			////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//back
-			////////////////////////////////////////////////////////////////////////////////////
-			backwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
-			backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			backwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
-			backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			backwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
-			backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			backwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
-
-			backwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
-			backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			backwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
-			backwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
-			backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			backwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
-			backwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
-			backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			backwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
-
-			backwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
-			backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			backwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
-			backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			backwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
-			backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			backwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//mfcbb += EQcbb;
-			//mfabb += EQabb;
-			//mfbcb += EQbcb;
-			//mfbab += EQbab;
-			//mfbbc += EQbbc;
-			//mfbba += EQbba;
-			//mfccb += EQccb;
-			//mfaab += EQaab;
-			//mfcab += EQcab;
-			//mfacb += EQacb;
-			//mfcbc += EQcbc;
-			//mfaba += EQaba;
-			//mfcba += EQcba;
-			//mfabc += EQabc;
-			//mfbcc += EQbcc;
-			//mfbaa += EQbaa;
-			//mfbca += EQbca;
-			//mfbac += EQbac;
-			//mfbbb += EQbbb;
-			//mfccc += EQccc;
-			//mfaac += EQaac;
-			//mfcac += EQcac;
-			//mfacc += EQacc;
-			//mfcca += EQcca;
-			//mfaaa += EQaaa;
-			//mfcaa += EQcaa;
-			//mfaca += EQaca;
-			////////////////////////////////////////////////////////////////////////////////////
-			////Error diffusion
-			real fTEMP = mfbbb + EQbbb;
-			real delta0 = mfbbb - (fTEMP - EQbbb);
-			delta0 *= c1o4;
-			mfbbb = fTEMP;
-
-
-			fTEMP = mfcbb + EQcbb;
-			real deltacbb = mfcbb - (fTEMP - EQcbb);
-			mfcbb = fTEMP;
-			//mfcbb+=EQcbb;
-
-			fTEMP = mfabb + EQabb;
-			real deltaabb = mfabb - (fTEMP - EQabb);
-			mfabb = fTEMP;
-			//mfabb+=EQabb;
-
-			fTEMP = mfbcb + EQbcb;
-			real deltabcb = mfbcb - (fTEMP - EQbcb);
-			mfbcb = fTEMP;
-			//mfbcb+=EQbcb;
-
-			fTEMP = mfbab + EQbab;
-			real deltabab = mfbab - (fTEMP - EQbab);
-			mfbab = fTEMP;
-			//mfbab+=EQbab;
-
-			fTEMP = mfbbc + EQbbc;
-			real deltabbc = mfbbc - (fTEMP - EQbbc);
-			mfbbc = fTEMP;
-			//mfbbc+=EQbbc;
-
-			fTEMP = mfbba + EQbba;
-			real deltabba = mfbba - (fTEMP - EQbba);
-			mfbba = fTEMP;
-			//mfbba+=EQbba;
-
-			EQccb += (delta0 + c1o2*(deltacbb + deltabcb));
-			fTEMP = mfccb + EQccb;
-			real deltaccb = mfccb - (fTEMP - EQccb);
-			mfccb = fTEMP;
-			//mfccb+=EQccb+(delta0+c1o2*(deltacbb+deltabcb));
-
-			EQaab += (delta0 + c1o2*(deltaabb + deltabab));
-			fTEMP = mfaab + EQaab;
-			real deltaaab = mfaab - (fTEMP - EQaab);
-			mfaab = fTEMP;
-			//mfaab+=EQaab+(delta0+c1o2*(deltaabb+deltabab));
-
-			EQcab += (delta0 + c1o2*(deltacbb + deltabab));
-			fTEMP = mfcab + EQcab;
-			real deltacab = mfcab - (fTEMP - EQcab);
-			mfcab = fTEMP;
-			//mfcab+=EQcab+(delta0+c1o2*(deltacbb+deltabab));
-
-			EQacb += (delta0 + c1o2*(deltaabb + deltabcb));
-			fTEMP = mfacb + EQacb;
-			real deltaacb = mfacb - (fTEMP - EQacb);
-			mfacb = fTEMP;
-			//mfacb+=EQacb+(delta0+c1o2*(deltaabb+deltabcb));
-
-			EQcbc += (delta0 + c1o2*(deltacbb + deltabbc));
-			fTEMP = mfcbc + EQcbc;
-			real deltacbc = mfcbc - (fTEMP - EQcbc);
-			mfcbc = fTEMP;
-			//mfcbc+=EQcbc+(delta0+c1o2*(deltacbb+deltabbc));
-
-			EQaba += (delta0 + c1o2*(deltaabb + deltabba));
-			fTEMP = mfaba + EQaba;
-			real deltaaba = mfaba - (fTEMP - EQaba);
-			mfaba = fTEMP;
-			//mfaba+=EQaba+(delta0+c1o2*(deltaabb+deltabba));
-
-			EQcba += (delta0 + c1o2*(deltacbb + deltabba));
-			fTEMP = mfcba + EQcba;
-			real deltacba = mfcba - (fTEMP - EQcba);
-			mfcba = fTEMP;
-			//mfcba+=EQcba+(delta0+c1o2*(deltacbb+deltabba));
-
-			EQabc += (delta0 + c1o2*(deltaabb + deltabbc));
-			fTEMP = mfabc + EQabc;
-			real deltaabc = mfabc - (fTEMP - EQabc);
-			mfabc = fTEMP;
-			//mfabc+=EQabc+(delta0+c1o2*(deltaabb+deltabbc));
-
-			EQbcc += (delta0 + c1o2*(deltabcb + deltabbc));
-			fTEMP = mfbcc + EQbcc;
-			real deltabcc = mfbcc - (fTEMP - EQbcc);
-			mfbcc = fTEMP;
-			//mfbcc+=EQbcc+(delta0+c1o2*(deltabcb+deltabbc));
-
-			EQbaa += (delta0 + c1o2*(deltabab + deltabba));
-			fTEMP = mfbaa + EQbaa;
-			real deltabaa = mfbaa - (fTEMP - EQbaa);
-			mfbaa = fTEMP;
-			//mfbaa+=EQbaa+(delta0+c1o2*(deltabab+deltabba));
-
-			EQbca += (delta0 + c1o2*(deltabcb + deltabba));
-			fTEMP = mfbca + EQbca;
-			real deltabca = mfbca - (fTEMP - EQbca);
-			mfbca = fTEMP;
-			//mfbca+=EQbca+(delta0+c1o2*(deltabcb+deltabba));
-
-			EQbac += (delta0 + c1o2*(deltabab + deltabbc));
-			fTEMP = mfbac + EQbac;
-			real deltabac = mfbac - (fTEMP - EQbac);
-			mfbac = fTEMP;
-			//mfbac+=EQbac+(delta0+c1o2*(deltabab+deltabbc));
-
-			mfccc += EQccc - (delta0 + c1o4*(deltacbb + deltabcb + deltabbc) - c1o2*(deltabcc + deltacbc + deltaccb));
-			mfaac += EQaac - (delta0 + c1o4*(deltaabb + deltabab + deltabbc) - c1o2*(deltabac + deltaabc + deltaaab));
-			mfcac += EQcac - (delta0 + c1o4*(deltacbb + deltabab + deltabbc) - c1o2*(deltabac + deltacbc + deltacab));
-			mfacc += EQacc - (delta0 + c1o4*(deltaabb + deltabcb + deltabbc) - c1o2*(deltabcc + deltaabc + deltaacb));
-			mfcca += EQcca - (delta0 + c1o4*(deltacbb + deltabcb + deltabba) - c1o2*(deltabca + deltacba + deltaccb));
-			mfaaa += EQaaa - (delta0 + c1o4*(deltaabb + deltabab + deltabba) - c1o2*(deltabaa + deltaaba + deltaaab));
-			mfcaa += EQcaa - (delta0 + c1o4*(deltacbb + deltabab + deltabba) - c1o2*(deltabaa + deltacba + deltacab));
-			mfaca += EQaca - (delta0 + c1o4*(deltaabb + deltabcb + deltabba) - c1o2*(deltabca + deltaaba + deltaacb));
-
-
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			////back
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Z - Dir
-			//backwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, one);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o3);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Y - Dir
-			//backwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaab, mfabb, mfacb, vvy, vy2, c2o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbaa, mfbba, mfbca, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbab, mfbbb, mfbcb, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbac, mfbbc, mfbcc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o18);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcab, mfcbb, mfccb, vvy, vy2, c2o9);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// X - Dir
-			//backwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaba, mfbba, mfcba, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaab, mfbab, mfcab, vvx, vx2, c1o9);
-			/////////////b////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfabb, mfbbb, mfcbb, vvx, vx2, c4o9);
-			/////////////b////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfacb, mfbcb, mfccb, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o36);
-			/////////////c////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfabc, mfbbc, mfcbc, vvx, vx2, c1o9);
-			/////////////c////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-
-			////////////////////////////////////////////////////////////////////////////////////////
-			//real drhoPost =
-			//	((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-			//	(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-			//		((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-			//mfbbb += drho - drhoPost;
-			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[dirE])[k] = mfabb;//(D.f[ dirE   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ dirE   ])[k   ]                                                                     
-			(D.f[dirW])[kw] = mfcbb;//(D.f[ dirW   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ dirW   ])[kw  ]                                                                   
-			(D.f[dirN])[k] = mfbab;//(D.f[ dirN   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ dirN   ])[k   ]
-			(D.f[dirS])[ks] = mfbcb;//(D.f[ dirS   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ dirS   ])[ks  ]
-			(D.f[dirT])[k] = mfbba;//(D.f[ dirT   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ dirT   ])[k   ]
-			(D.f[dirB])[kb] = mfbbc;//(D.f[ dirB   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ dirB   ])[kb  ]
-			(D.f[dirNE])[k] = mfaab;//(D.f[ dirNE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ dirNE  ])[k   ]
-			(D.f[dirSW])[ksw] = mfccb;//(D.f[ dirSW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ dirSW  ])[ksw ]
-			(D.f[dirSE])[ks] = mfacb;//(D.f[ dirSE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ dirSE  ])[ks  ]
-			(D.f[dirNW])[kw] = mfcab;//(D.f[ dirNW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ dirNW  ])[kw  ]
-			(D.f[dirTE])[k] = mfaba;//(D.f[ dirTE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ dirTE  ])[k   ]
-			(D.f[dirBW])[kbw] = mfcbc;//(D.f[ dirBW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ dirBW  ])[kbw ]
-			(D.f[dirBE])[kb] = mfabc;//(D.f[ dirBE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ dirBE  ])[kb  ]
-			(D.f[dirTW])[kw] = mfcba;//(D.f[ dirTW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ dirTW  ])[kw  ]
-			(D.f[dirTN])[k] = mfbaa;//(D.f[ dirTN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ dirTN  ])[k   ]
-			(D.f[dirBS])[kbs] = mfbcc;//(D.f[ dirBS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ dirBS  ])[kbs ]
-			(D.f[dirBN])[kb] = mfbac;//(D.f[ dirBN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ dirBN  ])[kb  ]
-			(D.f[dirTS])[ks] = mfbca;//(D.f[ dirTS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ dirTS  ])[ks  ]
-			(D.f[dirZERO])[k] = mfbbb;//(D.f[ dirZERO])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ dirZERO])[k   ]
-			(D.f[dirTNE])[k] = mfaaa;//(D.f[ dirTNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ dirTNE ])[k   ]
-			(D.f[dirTSE])[ks] = mfaca;//(D.f[ dirTSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ dirTSE ])[ks  ]
-			(D.f[dirBNE])[kb] = mfaac;//(D.f[ dirBNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ dirBNE ])[kb  ]
-			(D.f[dirBSE])[kbs] = mfacc;//(D.f[ dirBSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ dirBSE ])[kbs ]
-			(D.f[dirTNW])[kw] = mfcaa;//(D.f[ dirTNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ dirTNW ])[kw  ]
-			(D.f[dirTSW])[ksw] = mfcca;//(D.f[ dirTSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ dirTSW ])[ksw ]
-			(D.f[dirBNW])[kbw] = mfcac;//(D.f[ dirBNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ dirBNW ])[kbw ]
-			(D.f[dirBSW])[kbsw] = mfccc;//(D.f[ dirBSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ dirBSW ])[kbsw]
-										////////////////////////////////////////////////////////////////////////////////////
-		}
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    const unsigned  x = threadIdx.x;  // Globaler x-Index 
+    const unsigned  y = blockIdx.x;   // Globaler y-Index 
+    const unsigned  z = blockIdx.y;   // Globaler z-Index 
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    const unsigned k = nx*(ny*z + y) + x;
+    //////////////////////////////////////////////////////////////////////////
+
+    if (k<size_Mat)
+    {
+        ////////////////////////////////////////////////////////////////////////////////
+        unsigned int BC;
+        BC = bcMatD[k];
+
+        if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
+        {
+            Distributions27 D;
+            if (EvenOrOdd == true)
+            {
+                D.f[dirE] = &DDStart[dirE   *size_Mat];
+                D.f[dirW] = &DDStart[dirW   *size_Mat];
+                D.f[dirN] = &DDStart[dirN   *size_Mat];
+                D.f[dirS] = &DDStart[dirS   *size_Mat];
+                D.f[dirT] = &DDStart[dirT   *size_Mat];
+                D.f[dirB] = &DDStart[dirB   *size_Mat];
+                D.f[dirNE] = &DDStart[dirNE  *size_Mat];
+                D.f[dirSW] = &DDStart[dirSW  *size_Mat];
+                D.f[dirSE] = &DDStart[dirSE  *size_Mat];
+                D.f[dirNW] = &DDStart[dirNW  *size_Mat];
+                D.f[dirTE] = &DDStart[dirTE  *size_Mat];
+                D.f[dirBW] = &DDStart[dirBW  *size_Mat];
+                D.f[dirBE] = &DDStart[dirBE  *size_Mat];
+                D.f[dirTW] = &DDStart[dirTW  *size_Mat];
+                D.f[dirTN] = &DDStart[dirTN  *size_Mat];
+                D.f[dirBS] = &DDStart[dirBS  *size_Mat];
+                D.f[dirBN] = &DDStart[dirBN  *size_Mat];
+                D.f[dirTS] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirTNE] = &DDStart[dirTNE *size_Mat];
+                D.f[dirTSW] = &DDStart[dirTSW *size_Mat];
+                D.f[dirTSE] = &DDStart[dirTSE *size_Mat];
+                D.f[dirTNW] = &DDStart[dirTNW *size_Mat];
+                D.f[dirBNE] = &DDStart[dirBNE *size_Mat];
+                D.f[dirBSW] = &DDStart[dirBSW *size_Mat];
+                D.f[dirBSE] = &DDStart[dirBSE *size_Mat];
+                D.f[dirBNW] = &DDStart[dirBNW *size_Mat];
+            }
+            else
+            {
+                D.f[dirW] = &DDStart[dirE   *size_Mat];
+                D.f[dirE] = &DDStart[dirW   *size_Mat];
+                D.f[dirS] = &DDStart[dirN   *size_Mat];
+                D.f[dirN] = &DDStart[dirS   *size_Mat];
+                D.f[dirB] = &DDStart[dirT   *size_Mat];
+                D.f[dirT] = &DDStart[dirB   *size_Mat];
+                D.f[dirSW] = &DDStart[dirNE  *size_Mat];
+                D.f[dirNE] = &DDStart[dirSW  *size_Mat];
+                D.f[dirNW] = &DDStart[dirSE  *size_Mat];
+                D.f[dirSE] = &DDStart[dirNW  *size_Mat];
+                D.f[dirBW] = &DDStart[dirTE  *size_Mat];
+                D.f[dirTE] = &DDStart[dirBW  *size_Mat];
+                D.f[dirTW] = &DDStart[dirBE  *size_Mat];
+                D.f[dirBE] = &DDStart[dirTW  *size_Mat];
+                D.f[dirBS] = &DDStart[dirTN  *size_Mat];
+                D.f[dirTN] = &DDStart[dirBS  *size_Mat];
+                D.f[dirTS] = &DDStart[dirBN  *size_Mat];
+                D.f[dirBN] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirBSW] = &DDStart[dirTNE *size_Mat];
+                D.f[dirBNE] = &DDStart[dirTSW *size_Mat];
+                D.f[dirBNW] = &DDStart[dirTSE *size_Mat];
+                D.f[dirBSE] = &DDStart[dirTNW *size_Mat];
+                D.f[dirTSW] = &DDStart[dirBNE *size_Mat];
+                D.f[dirTNE] = &DDStart[dirBSW *size_Mat];
+                D.f[dirTNW] = &DDStart[dirBSE *size_Mat];
+                D.f[dirTSE] = &DDStart[dirBNW *size_Mat];
+            }
+
+            ////////////////////////////////////////////////////////////////////////////////
+            //index
+            //unsigned int kzero= k;
+            //unsigned int ke   = k;
+            unsigned int kw = neighborX[k];
+            //unsigned int kn   = k;
+            unsigned int ks = neighborY[k];
+            //unsigned int kt   = k;
+            unsigned int kb = neighborZ[k];
+            unsigned int ksw = neighborY[kw];
+            //unsigned int kne  = k;
+            //unsigned int kse  = ks;
+            //unsigned int knw  = kw;
+            unsigned int kbw = neighborZ[kw];
+            //unsigned int kte  = k;
+            //unsigned int kbe  = kb;
+            //unsigned int ktw  = kw;
+            unsigned int kbs = neighborZ[ks];
+            //unsigned int ktn  = k;
+            //unsigned int kbn  = kb;
+            //unsigned int kts  = ks;
+            //unsigned int ktse = ks;
+            //unsigned int kbnw = kbw;
+            //unsigned int ktnw = kw;
+            //unsigned int kbse = kbs;
+            //unsigned int ktsw = ksw;
+            //unsigned int kbne = kb;
+            //unsigned int ktne = k;
+            unsigned int kbsw = neighborZ[ksw];
+
+
+
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            real mfcbb = (D.f[dirE])[k];//[ke   ];// +  c2over27 ;(D.f[dirE   ])[k  ];//ke
+            real mfabb = (D.f[dirW])[kw];//[kw   ];// +  c2over27 ;(D.f[dirW   ])[kw ];
+            real mfbcb = (D.f[dirN])[k];//[kn   ];// +  c2over27 ;(D.f[dirN   ])[k  ];//kn
+            real mfbab = (D.f[dirS])[ks];//[ks   ];// +  c2over27 ;(D.f[dirS   ])[ks ];
+            real mfbbc = (D.f[dirT])[k];//[kt   ];// +  c2over27 ;(D.f[dirT   ])[k  ];//kt
+            real mfbba = (D.f[dirB])[kb];//[kb   ];// +  c2over27 ;(D.f[dirB   ])[kb ];
+            real mfccb = (D.f[dirNE])[k];//[kne  ];// +  c1over54 ;(D.f[dirNE  ])[k  ];//kne
+            real mfaab = (D.f[dirSW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[dirSW  ])[ksw];
+            real mfcab = (D.f[dirSE])[ks];//[kse  ];// +  c1over54 ;(D.f[dirSE  ])[ks ];//kse
+            real mfacb = (D.f[dirNW])[kw];//[knw  ];// +  c1over54 ;(D.f[dirNW  ])[kw ];//knw
+            real mfcbc = (D.f[dirTE])[k];//[kte  ];// +  c1over54 ;(D.f[dirTE  ])[k  ];//kte
+            real mfaba = (D.f[dirBW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[dirBW  ])[kbw];
+            real mfcba = (D.f[dirBE])[kb];//[kbe  ];// +  c1over54 ;(D.f[dirBE  ])[kb ];//kbe
+            real mfabc = (D.f[dirTW])[kw];//[ktw  ];// +  c1over54 ;(D.f[dirTW  ])[kw ];//ktw
+            real mfbcc = (D.f[dirTN])[k];//[ktn  ];// +  c1over54 ;(D.f[dirTN  ])[k  ];//ktn
+            real mfbaa = (D.f[dirBS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[dirBS  ])[kbs];
+            real mfbca = (D.f[dirBN])[kb];//[kbn  ];// +  c1over54 ;(D.f[dirBN  ])[kb ];//kbn
+            real mfbac = (D.f[dirTS])[ks];//[kts  ];// +  c1over54 ;(D.f[dirTS  ])[ks ];//kts
+            real mfbbb = (D.f[dirZERO])[k];//[kzero];// +  c8over27 ;(D.f[dirZERO])[k  ];//kzero
+            real mfccc = (D.f[dirTNE])[k];//[ktne ];// +  c1over216;(D.f[dirTNE ])[k  ];//ktne
+            real mfaac = (D.f[dirTSW])[ksw];//[ktsw ];// +  c1over216;(D.f[dirTSW ])[ksw];//ktsw
+            real mfcac = (D.f[dirTSE])[ks];//[ktse ];// +  c1over216;(D.f[dirTSE ])[ks ];//ktse
+            real mfacc = (D.f[dirTNW])[kw];//[ktnw ];// +  c1over216;(D.f[dirTNW ])[kw ];//ktnw
+            real mfcca = (D.f[dirBNE])[kb];//[kbne ];// +  c1over216;(D.f[dirBNE ])[kb ];//kbne
+            real mfaaa = (D.f[dirBSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[dirBSW ])[kbsw];
+            real mfcaa = (D.f[dirBSE])[kbs];//[kbse ];// +  c1over216;(D.f[dirBSE ])[kbs];//kbse
+            real mfaca = (D.f[dirBNW])[kbw];//[kbnw ];// +  c1over216;(D.f[dirBNW ])[kbw];//kbnw
+                                               ////////////////////////////////////////////////////////////////////////////////////
+            real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+
+            real rho = c1o1 + drho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                (mfcbb - mfabb)) / rho;
+            real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                (mfbcb - mfbab)) / rho;
+            real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                (mfbbc - mfbba)) / rho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
+            real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
+            real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
+            vvx += fx*c1o2;
+            vvy += fy*c1o2;
+            vvz += fz*c1o2;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real omega = omega_in;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //fast
+            //real oMdrho = c1o1; // comp special
+            //real m0, m1, m2;
+            real vx2;
+            real vy2;
+            real vz2;
+            vx2 = vvx*vvx;
+            vy2 = vvy*vvy;
+            vz2 = vvz*vvz;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real wadjust;
+            //real qudricLimitP = c1o100;// * 0.0001f;
+            //real qudricLimitM = c1o100;// * 0.0001f;
+            //real qudricLimitD = c1o100;// * 0.001f;
+            //real s9 = minusomega;
+            //test
+            //s9 = 0.;
+
+
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            real EQcbb = c0o1;
+            real EQabb = c0o1;
+            real EQbcb = c0o1;
+            real EQbab = c0o1;
+            real EQbbc = c0o1;
+            real EQbba = c0o1;
+            real EQccb = c0o1;
+            real EQaab = c0o1;
+            real EQcab = c0o1;
+            real EQacb = c0o1;
+            real EQcbc = c0o1;
+            real EQaba = c0o1;
+            real EQcba = c0o1;
+            real EQabc = c0o1;
+            real EQbcc = c0o1;
+            real EQbaa = c0o1;
+            real EQbca = c0o1;
+            real EQbac = c0o1;
+            real EQbbb = c0o1;
+            real EQccc = drho * c1o27;
+            real EQaac = drho * c1o3;
+            real EQcac = drho * c1o9;
+            real EQacc = drho * c1o9;
+            real EQcca = drho * c1o9;
+            real EQaaa = drho;
+            real EQcaa = drho * c1o3;
+            real EQaca = drho * c1o3;
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::backwardChimeraWithK(EQaaa, EQaab, EQaac, vvz, vz2, c1o1);
+            vf::lbm::backwardChimeraWithK(EQaca, EQacb, EQacc, vvz, vz2, c1o3);
+            ///////////////////////////////////////////////////////////
+            EQcaa = EQaca; EQcab = EQacb; EQcac = EQacc;
+            ///////////////////////////////////////////////////////////
+            vf::lbm::backwardChimeraWithK(EQcca, EQccb, EQccc, vvz, vz2, c1o9);
+
+            vf::lbm::backwardChimeraWithK(EQaaa, EQaba, EQaca, vvy, vy2, c1o6);
+            vf::lbm::backwardChimeraWithK(EQaab, EQabb, EQacb, vvy, vy2, c2o3);
+            vf::lbm::backwardChimeraWithK(EQaac, EQabc, EQacc, vvy, vy2, c1o6);
+            vf::lbm::backwardChimeraWithK(EQcaa, EQcba, EQcca, vvy, vy2, c1o18);
+            vf::lbm::backwardChimeraWithK(EQcab, EQcbb, EQccb, vvy, vy2, c2o9);
+            vf::lbm::backwardChimeraWithK(EQcac, EQcbc, EQccc, vvy, vy2, c1o18);
+
+            vf::lbm::backwardChimeraWithK(EQaaa, EQbaa, EQcaa, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQaab, EQbab, EQcab, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQaac, EQbac, EQcac, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQaba, EQbba, EQcba, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQabb, EQbbb, EQcbb, vvx, vx2, c4o9);
+            vf::lbm::backwardChimeraWithK(EQabc, EQbbc, EQcbc, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQaca, EQbca, EQcca, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQacb, EQbcb, EQccb, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQacc, EQbcc, EQccc, vvx, vx2, c1o36);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //Pre-condition
+            mfcbb -= EQcbb;
+            mfabb -= EQabb;
+            mfbcb -= EQbcb;
+            mfbab -= EQbab;
+            mfbbc -= EQbbc;
+            mfbba -= EQbba;
+            mfccb -= EQccb;
+            mfaab -= EQaab;
+            mfcab -= EQcab;
+            mfacb -= EQacb;
+            mfcbc -= EQcbc;
+            mfaba -= EQaba;
+            mfcba -= EQcba;
+            mfabc -= EQabc;
+            mfbcc -= EQbcc;
+            mfbaa -= EQbaa;
+            mfbca -= EQbca;
+            mfbac -= EQbac;
+            mfbbb -= EQbbb;
+            mfccc -= EQccc;
+            mfaac -= EQaac;
+            mfcac -= EQcac;
+            mfacc -= EQacc;
+            mfcca -= EQcca;
+            mfaaa -= EQaaa;
+            mfcaa -= EQcaa;
+            mfaca -= EQaca;
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //Hin
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::forwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
+            vf::lbm::forwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            vf::lbm::forwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
+            vf::lbm::forwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            vf::lbm::forwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            vf::lbm::forwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            vf::lbm::forwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
+            vf::lbm::forwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            vf::lbm::forwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
+
+            vf::lbm::forwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
+            vf::lbm::forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::forwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
+            vf::lbm::forwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
+            vf::lbm::forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::forwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
+            vf::lbm::forwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
+            vf::lbm::forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::forwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
+
+            vf::lbm::forwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
+            vf::lbm::forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::forwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
+            vf::lbm::forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::forwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
+            vf::lbm::forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::forwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////Hin
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Z - Dir
+            //forwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c4o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Y - Dir
+            //forwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c2o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c2o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// X - Dir
+            //forwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, one);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Cumulants
+            ////////////////////////////////////////////////////////////////////////////////////
+            real OxxPyyPzz = c1o1; //omega; // one;	//set the bulk viscosity one is high / two is very low and zero is (too) high
+
+            ////////////////////////////////////////////////////////////
+            //3.
+            //////////////////////////////
+            real OxyyPxzz = c1o1;
+            real OxyyMxzz = c1o1;
+            //real Oxyz = c1o1;
+            ////////////////////////////////////////////////////////////
+            //4.
+            //////////////////////////////
+            real O4 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //5.
+            //////////////////////////////
+            real O5 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //6.
+            //////////////////////////////
+            real O6 = c1o1;
+            ////////////////////////////////////////////////////////////
+
+
+            //central moments to cumulants
+            //4.
+            real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+            real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+            real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
+
+            real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
+            real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
+            real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
+
+            //5.
+            real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+            real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+            real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+
+            //6.
+
+            real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                    + c2o1 * (mfcaa * mfaca * mfaac)
+                    + c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
+                - c1o3 * (mfacc + mfcac + mfcca) / rho
+                - c1o9 * (mfcaa + mfaca + mfaac) / rho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                    + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
+                + c1o27*((drho * drho - drho) / (rho*rho)));
+
+
+
+
+            //2.
+            // linear combinations
+            real mxxPyyPzz = mfcaa + mfaca + mfaac;
+            real mxxMyy = mfcaa - mfaca;
+            real mxxMzz = mfcaa - mfaac;
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
+            {
+                real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                real dyuy = dxux + omega * c3o2 * mxxMyy;
+                real dzuz = dxux + omega * c3o2 * mxxMzz;
+
+                //relax
+                mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+
+            }
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            ////no correction
+            //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
+            //mxxMyy += -(-omega) * (-mxxMyy);
+            //mxxMzz += -(-omega) * (-mxxMzz);
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            mfabb += omega * (-mfabb);
+            mfbab += omega * (-mfbab);
+            mfbba += omega * (-mfbba);
+
+            //////////////////////////////////////////////////////////////////////////
+
+            // linear combinations back
+            mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
+
+            //3.
+            // linear combinations
+
+            real mxxyPyzz = mfcba + mfabc;
+            real mxxyMyzz = mfcba - mfabc;
+
+            real mxxzPyyz = mfcab + mfacb;
+            real mxxzMyyz = mfcab - mfacb;
+
+            real mxyyPxzz = mfbca + mfbac;
+            real mxyyMxzz = mfbca - mfbac;
+
+            //relax
+            //////////////////////////////////////////////////////////////////////////
+            mfbbb += OxyyMxzz * (-mfbbb);
+            mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
+            mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
+            mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
+            mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
+            mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
+            mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
+            //////////////////////////////////////////////////////////////////////////
+
+            mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
+            mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
+            mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
+            mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
+            mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
+            mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+
+            //4.
+            //////////////////////////////////////////////////////////////////////////
+            CUMacc += O4 * (-CUMacc);
+            CUMcac += O4 * (-CUMcac);
+            CUMcca += O4 * (-CUMcca);
+
+            CUMbbc += O4 * (-CUMbbc);
+            CUMbcb += O4 * (-CUMbcb);
+            CUMcbb += O4 * (-CUMcbb);
+            //////////////////////////////////////////////////////////////////////////
+
+
+            //5.
+            CUMbcc += O5 * (-CUMbcc);
+            CUMcbc += O5 * (-CUMcbc);
+            CUMccb += O5 * (-CUMccb);
+
+            //6.
+            CUMccc += O6 * (-CUMccc);
+
+
+
+            //back cumulants to central moments
+            //4.
+            mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+            mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+            mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
+
+            mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
+            mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
+            mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
+
+            //5.
+            mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+            mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+            mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+
+            //6.
+            mfccc = CUMccc - ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                    + c2o1 * (mfcaa * mfaca * mfaac)
+                    + c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
+                - c1o3 * (mfacc + mfcac + mfcca) / rho
+                - c1o9 * (mfcaa + mfaca + mfaac) / rho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                    + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
+                + c1o27*((drho * drho - drho) / (rho*rho)));
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            mfbaa = -mfbaa;
+            mfaba = -mfaba;
+            mfaab = -mfaab;
+            ////////////////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //back
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::backwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
+            vf::lbm::backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            vf::lbm::backwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
+            vf::lbm::backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            vf::lbm::backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            vf::lbm::backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            vf::lbm::backwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
+            vf::lbm::backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            vf::lbm::backwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
+
+            vf::lbm::backwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
+            vf::lbm::backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::backwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
+            vf::lbm::backwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
+            vf::lbm::backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::backwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
+            vf::lbm::backwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
+            vf::lbm::backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::backwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
+
+            vf::lbm::backwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
+            vf::lbm::backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::backwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
+            vf::lbm::backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::backwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
+            vf::lbm::backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::backwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //mfcbb += EQcbb;
+            //mfabb += EQabb;
+            //mfbcb += EQbcb;
+            //mfbab += EQbab;
+            //mfbbc += EQbbc;
+            //mfbba += EQbba;
+            //mfccb += EQccb;
+            //mfaab += EQaab;
+            //mfcab += EQcab;
+            //mfacb += EQacb;
+            //mfcbc += EQcbc;
+            //mfaba += EQaba;
+            //mfcba += EQcba;
+            //mfabc += EQabc;
+            //mfbcc += EQbcc;
+            //mfbaa += EQbaa;
+            //mfbca += EQbca;
+            //mfbac += EQbac;
+            //mfbbb += EQbbb;
+            //mfccc += EQccc;
+            //mfaac += EQaac;
+            //mfcac += EQcac;
+            //mfacc += EQacc;
+            //mfcca += EQcca;
+            //mfaaa += EQaaa;
+            //mfcaa += EQcaa;
+            //mfaca += EQaca;
+            ////////////////////////////////////////////////////////////////////////////////////
+            ////Error diffusion
+            real fTEMP = mfbbb + EQbbb;
+            real delta0 = mfbbb - (fTEMP - EQbbb);
+            delta0 *= c1o4;
+            mfbbb = fTEMP;
+
+
+            fTEMP = mfcbb + EQcbb;
+            real deltacbb = mfcbb - (fTEMP - EQcbb);
+            mfcbb = fTEMP;
+            //mfcbb+=EQcbb;
+
+            fTEMP = mfabb + EQabb;
+            real deltaabb = mfabb - (fTEMP - EQabb);
+            mfabb = fTEMP;
+            //mfabb+=EQabb;
+
+            fTEMP = mfbcb + EQbcb;
+            real deltabcb = mfbcb - (fTEMP - EQbcb);
+            mfbcb = fTEMP;
+            //mfbcb+=EQbcb;
+
+            fTEMP = mfbab + EQbab;
+            real deltabab = mfbab - (fTEMP - EQbab);
+            mfbab = fTEMP;
+            //mfbab+=EQbab;
+
+            fTEMP = mfbbc + EQbbc;
+            real deltabbc = mfbbc - (fTEMP - EQbbc);
+            mfbbc = fTEMP;
+            //mfbbc+=EQbbc;
+
+            fTEMP = mfbba + EQbba;
+            real deltabba = mfbba - (fTEMP - EQbba);
+            mfbba = fTEMP;
+            //mfbba+=EQbba;
+
+            EQccb += (delta0 + c1o2*(deltacbb + deltabcb));
+            fTEMP = mfccb + EQccb;
+            real deltaccb = mfccb - (fTEMP - EQccb);
+            mfccb = fTEMP;
+            //mfccb+=EQccb+(delta0+c1o2*(deltacbb+deltabcb));
+
+            EQaab += (delta0 + c1o2*(deltaabb + deltabab));
+            fTEMP = mfaab + EQaab;
+            real deltaaab = mfaab - (fTEMP - EQaab);
+            mfaab = fTEMP;
+            //mfaab+=EQaab+(delta0+c1o2*(deltaabb+deltabab));
+
+            EQcab += (delta0 + c1o2*(deltacbb + deltabab));
+            fTEMP = mfcab + EQcab;
+            real deltacab = mfcab - (fTEMP - EQcab);
+            mfcab = fTEMP;
+            //mfcab+=EQcab+(delta0+c1o2*(deltacbb+deltabab));
+
+            EQacb += (delta0 + c1o2*(deltaabb + deltabcb));
+            fTEMP = mfacb + EQacb;
+            real deltaacb = mfacb - (fTEMP - EQacb);
+            mfacb = fTEMP;
+            //mfacb+=EQacb+(delta0+c1o2*(deltaabb+deltabcb));
+
+            EQcbc += (delta0 + c1o2*(deltacbb + deltabbc));
+            fTEMP = mfcbc + EQcbc;
+            real deltacbc = mfcbc - (fTEMP - EQcbc);
+            mfcbc = fTEMP;
+            //mfcbc+=EQcbc+(delta0+c1o2*(deltacbb+deltabbc));
+
+            EQaba += (delta0 + c1o2*(deltaabb + deltabba));
+            fTEMP = mfaba + EQaba;
+            real deltaaba = mfaba - (fTEMP - EQaba);
+            mfaba = fTEMP;
+            //mfaba+=EQaba+(delta0+c1o2*(deltaabb+deltabba));
+
+            EQcba += (delta0 + c1o2*(deltacbb + deltabba));
+            fTEMP = mfcba + EQcba;
+            real deltacba = mfcba - (fTEMP - EQcba);
+            mfcba = fTEMP;
+            //mfcba+=EQcba+(delta0+c1o2*(deltacbb+deltabba));
+
+            EQabc += (delta0 + c1o2*(deltaabb + deltabbc));
+            fTEMP = mfabc + EQabc;
+            real deltaabc = mfabc - (fTEMP - EQabc);
+            mfabc = fTEMP;
+            //mfabc+=EQabc+(delta0+c1o2*(deltaabb+deltabbc));
+
+            EQbcc += (delta0 + c1o2*(deltabcb + deltabbc));
+            fTEMP = mfbcc + EQbcc;
+            real deltabcc = mfbcc - (fTEMP - EQbcc);
+            mfbcc = fTEMP;
+            //mfbcc+=EQbcc+(delta0+c1o2*(deltabcb+deltabbc));
+
+            EQbaa += (delta0 + c1o2*(deltabab + deltabba));
+            fTEMP = mfbaa + EQbaa;
+            real deltabaa = mfbaa - (fTEMP - EQbaa);
+            mfbaa = fTEMP;
+            //mfbaa+=EQbaa+(delta0+c1o2*(deltabab+deltabba));
+
+            EQbca += (delta0 + c1o2*(deltabcb + deltabba));
+            fTEMP = mfbca + EQbca;
+            real deltabca = mfbca - (fTEMP - EQbca);
+            mfbca = fTEMP;
+            //mfbca+=EQbca+(delta0+c1o2*(deltabcb+deltabba));
+
+            EQbac += (delta0 + c1o2*(deltabab + deltabbc));
+            fTEMP = mfbac + EQbac;
+            real deltabac = mfbac - (fTEMP - EQbac);
+            mfbac = fTEMP;
+            //mfbac+=EQbac+(delta0+c1o2*(deltabab+deltabbc));
+
+            mfccc += EQccc - (delta0 + c1o4*(deltacbb + deltabcb + deltabbc) - c1o2*(deltabcc + deltacbc + deltaccb));
+            mfaac += EQaac - (delta0 + c1o4*(deltaabb + deltabab + deltabbc) - c1o2*(deltabac + deltaabc + deltaaab));
+            mfcac += EQcac - (delta0 + c1o4*(deltacbb + deltabab + deltabbc) - c1o2*(deltabac + deltacbc + deltacab));
+            mfacc += EQacc - (delta0 + c1o4*(deltaabb + deltabcb + deltabbc) - c1o2*(deltabcc + deltaabc + deltaacb));
+            mfcca += EQcca - (delta0 + c1o4*(deltacbb + deltabcb + deltabba) - c1o2*(deltabca + deltacba + deltaccb));
+            mfaaa += EQaaa - (delta0 + c1o4*(deltaabb + deltabab + deltabba) - c1o2*(deltabaa + deltaaba + deltaaab));
+            mfcaa += EQcaa - (delta0 + c1o4*(deltacbb + deltabab + deltabba) - c1o2*(deltabaa + deltacba + deltacab));
+            mfaca += EQaca - (delta0 + c1o4*(deltaabb + deltabcb + deltabba) - c1o2*(deltabca + deltaaba + deltaacb));
+
+
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////back
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Z - Dir
+            //backwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, one);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o3);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Y - Dir
+            //backwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaab, mfabb, mfacb, vvy, vy2, c2o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbaa, mfbba, mfbca, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbab, mfbbb, mfbcb, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbac, mfbbc, mfbcc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o18);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcab, mfcbb, mfccb, vvy, vy2, c2o9);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// X - Dir
+            //backwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaba, mfbba, mfcba, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaab, mfbab, mfcab, vvx, vx2, c1o9);
+            /////////////b////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfabb, mfbbb, mfcbb, vvx, vx2, c4o9);
+            /////////////b////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfacb, mfbcb, mfccb, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o36);
+            /////////////c////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfabc, mfbbc, mfcbc, vvx, vx2, c1o9);
+            /////////////c////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+
+            ////////////////////////////////////////////////////////////////////////////////////////
+            //real drhoPost =
+            //	((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+            //	(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+            //		((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+            //mfbbb += drho - drhoPost;
+            ////////////////////////////////////////////////////////////////////////////////////
+            (D.f[dirE])[k] = mfabb;//(D.f[ dirE   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ dirE   ])[k   ]                                                                     
+            (D.f[dirW])[kw] = mfcbb;//(D.f[ dirW   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ dirW   ])[kw  ]                                                                   
+            (D.f[dirN])[k] = mfbab;//(D.f[ dirN   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ dirN   ])[k   ]
+            (D.f[dirS])[ks] = mfbcb;//(D.f[ dirS   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ dirS   ])[ks  ]
+            (D.f[dirT])[k] = mfbba;//(D.f[ dirT   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ dirT   ])[k   ]
+            (D.f[dirB])[kb] = mfbbc;//(D.f[ dirB   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ dirB   ])[kb  ]
+            (D.f[dirNE])[k] = mfaab;//(D.f[ dirNE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ dirNE  ])[k   ]
+            (D.f[dirSW])[ksw] = mfccb;//(D.f[ dirSW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ dirSW  ])[ksw ]
+            (D.f[dirSE])[ks] = mfacb;//(D.f[ dirSE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ dirSE  ])[ks  ]
+            (D.f[dirNW])[kw] = mfcab;//(D.f[ dirNW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ dirNW  ])[kw  ]
+            (D.f[dirTE])[k] = mfaba;//(D.f[ dirTE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ dirTE  ])[k   ]
+            (D.f[dirBW])[kbw] = mfcbc;//(D.f[ dirBW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ dirBW  ])[kbw ]
+            (D.f[dirBE])[kb] = mfabc;//(D.f[ dirBE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ dirBE  ])[kb  ]
+            (D.f[dirTW])[kw] = mfcba;//(D.f[ dirTW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ dirTW  ])[kw  ]
+            (D.f[dirTN])[k] = mfbaa;//(D.f[ dirTN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ dirTN  ])[k   ]
+            (D.f[dirBS])[kbs] = mfbcc;//(D.f[ dirBS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ dirBS  ])[kbs ]
+            (D.f[dirBN])[kb] = mfbac;//(D.f[ dirBN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ dirBN  ])[kb  ]
+            (D.f[dirTS])[ks] = mfbca;//(D.f[ dirTS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ dirTS  ])[ks  ]
+            (D.f[dirZERO])[k] = mfbbb;//(D.f[ dirZERO])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ dirZERO])[k   ]
+            (D.f[dirTNE])[k] = mfaaa;//(D.f[ dirTNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ dirTNE ])[k   ]
+            (D.f[dirTSE])[ks] = mfaca;//(D.f[ dirTSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ dirTSE ])[ks  ]
+            (D.f[dirBNE])[kb] = mfaac;//(D.f[ dirBNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ dirBNE ])[kb  ]
+            (D.f[dirBSE])[kbs] = mfacc;//(D.f[ dirBSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ dirBSE ])[kbs ]
+            (D.f[dirTNW])[kw] = mfcaa;//(D.f[ dirTNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ dirTNW ])[kw  ]
+            (D.f[dirTSW])[ksw] = mfcca;//(D.f[ dirTSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ dirTSW ])[ksw ]
+            (D.f[dirBNW])[kbw] = mfcac;//(D.f[ dirBNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ dirBNW ])[kbw ]
+            (D.f[dirBSW])[kbsw] = mfccc;//(D.f[ dirBSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ dirBSW ])[kbsw]
+                                        ////////////////////////////////////////////////////////////////////////////////////
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1002,758 +958,758 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		unsigned int BC;
-		BC = bcMatD[k];
-
-		if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
-		{
-			Distributions27 D;
-			if (EvenOrOdd == true)
-			{
-				D.f[dirE] = &DDStart[dirE   *size_Mat];
-				D.f[dirW] = &DDStart[dirW   *size_Mat];
-				D.f[dirN] = &DDStart[dirN   *size_Mat];
-				D.f[dirS] = &DDStart[dirS   *size_Mat];
-				D.f[dirT] = &DDStart[dirT   *size_Mat];
-				D.f[dirB] = &DDStart[dirB   *size_Mat];
-				D.f[dirNE] = &DDStart[dirNE  *size_Mat];
-				D.f[dirSW] = &DDStart[dirSW  *size_Mat];
-				D.f[dirSE] = &DDStart[dirSE  *size_Mat];
-				D.f[dirNW] = &DDStart[dirNW  *size_Mat];
-				D.f[dirTE] = &DDStart[dirTE  *size_Mat];
-				D.f[dirBW] = &DDStart[dirBW  *size_Mat];
-				D.f[dirBE] = &DDStart[dirBE  *size_Mat];
-				D.f[dirTW] = &DDStart[dirTW  *size_Mat];
-				D.f[dirTN] = &DDStart[dirTN  *size_Mat];
-				D.f[dirBS] = &DDStart[dirBS  *size_Mat];
-				D.f[dirBN] = &DDStart[dirBN  *size_Mat];
-				D.f[dirTS] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirTNE] = &DDStart[dirTNE *size_Mat];
-				D.f[dirTSW] = &DDStart[dirTSW *size_Mat];
-				D.f[dirTSE] = &DDStart[dirTSE *size_Mat];
-				D.f[dirTNW] = &DDStart[dirTNW *size_Mat];
-				D.f[dirBNE] = &DDStart[dirBNE *size_Mat];
-				D.f[dirBSW] = &DDStart[dirBSW *size_Mat];
-				D.f[dirBSE] = &DDStart[dirBSE *size_Mat];
-				D.f[dirBNW] = &DDStart[dirBNW *size_Mat];
-			}
-			else
-			{
-				D.f[dirW] = &DDStart[dirE   *size_Mat];
-				D.f[dirE] = &DDStart[dirW   *size_Mat];
-				D.f[dirS] = &DDStart[dirN   *size_Mat];
-				D.f[dirN] = &DDStart[dirS   *size_Mat];
-				D.f[dirB] = &DDStart[dirT   *size_Mat];
-				D.f[dirT] = &DDStart[dirB   *size_Mat];
-				D.f[dirSW] = &DDStart[dirNE  *size_Mat];
-				D.f[dirNE] = &DDStart[dirSW  *size_Mat];
-				D.f[dirNW] = &DDStart[dirSE  *size_Mat];
-				D.f[dirSE] = &DDStart[dirNW  *size_Mat];
-				D.f[dirBW] = &DDStart[dirTE  *size_Mat];
-				D.f[dirTE] = &DDStart[dirBW  *size_Mat];
-				D.f[dirTW] = &DDStart[dirBE  *size_Mat];
-				D.f[dirBE] = &DDStart[dirTW  *size_Mat];
-				D.f[dirBS] = &DDStart[dirTN  *size_Mat];
-				D.f[dirTN] = &DDStart[dirBS  *size_Mat];
-				D.f[dirTS] = &DDStart[dirBN  *size_Mat];
-				D.f[dirBN] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirBSW] = &DDStart[dirTNE *size_Mat];
-				D.f[dirBNE] = &DDStart[dirTSW *size_Mat];
-				D.f[dirBNW] = &DDStart[dirTSE *size_Mat];
-				D.f[dirBSE] = &DDStart[dirTNW *size_Mat];
-				D.f[dirTSW] = &DDStart[dirBNE *size_Mat];
-				D.f[dirTNE] = &DDStart[dirBSW *size_Mat];
-				D.f[dirTNW] = &DDStart[dirBSE *size_Mat];
-				D.f[dirTSE] = &DDStart[dirBNW *size_Mat];
-			}
-
-			////////////////////////////////////////////////////////////////////////////////
-			//index
-			//unsigned int kzero= k;
-			//unsigned int ke   = k;
-			unsigned int kw = neighborX[k];
-			//unsigned int kn   = k;
-			unsigned int ks = neighborY[k];
-			//unsigned int kt   = k;
-			unsigned int kb = neighborZ[k];
-			unsigned int ksw = neighborY[kw];
-			//unsigned int kne  = k;
-			//unsigned int kse  = ks;
-			//unsigned int knw  = kw;
-			unsigned int kbw = neighborZ[kw];
-			//unsigned int kte  = k;
-			//unsigned int kbe  = kb;
-			//unsigned int ktw  = kw;
-			unsigned int kbs = neighborZ[ks];
-			//unsigned int ktn  = k;
-			//unsigned int kbn  = kb;
-			//unsigned int kts  = ks;
-			//unsigned int ktse = ks;
-			//unsigned int kbnw = kbw;
-			//unsigned int ktnw = kw;
-			//unsigned int kbse = kbs;
-			//unsigned int ktsw = ksw;
-			//unsigned int kbne = kb;
-			//unsigned int ktne = k;
-			unsigned int kbsw = neighborZ[ksw];
-
-
-
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[dirE])[k];//[ke   ];// +  c2over27 ;(D.f[dirE   ])[k  ];//ke
-			real mfabb = (D.f[dirW])[kw];//[kw   ];// +  c2over27 ;(D.f[dirW   ])[kw ];
-			real mfbcb = (D.f[dirN])[k];//[kn   ];// +  c2over27 ;(D.f[dirN   ])[k  ];//kn
-			real mfbab = (D.f[dirS])[ks];//[ks   ];// +  c2over27 ;(D.f[dirS   ])[ks ];
-			real mfbbc = (D.f[dirT])[k];//[kt   ];// +  c2over27 ;(D.f[dirT   ])[k  ];//kt
-			real mfbba = (D.f[dirB])[kb];//[kb   ];// +  c2over27 ;(D.f[dirB   ])[kb ];
-			real mfccb = (D.f[dirNE])[k];//[kne  ];// +  c1over54 ;(D.f[dirNE  ])[k  ];//kne
-			real mfaab = (D.f[dirSW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[dirSW  ])[ksw];
-			real mfcab = (D.f[dirSE])[ks];//[kse  ];// +  c1over54 ;(D.f[dirSE  ])[ks ];//kse
-			real mfacb = (D.f[dirNW])[kw];//[knw  ];// +  c1over54 ;(D.f[dirNW  ])[kw ];//knw
-			real mfcbc = (D.f[dirTE])[k];//[kte  ];// +  c1over54 ;(D.f[dirTE  ])[k  ];//kte
-			real mfaba = (D.f[dirBW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[dirBW  ])[kbw];
-			real mfcba = (D.f[dirBE])[kb];//[kbe  ];// +  c1over54 ;(D.f[dirBE  ])[kb ];//kbe
-			real mfabc = (D.f[dirTW])[kw];//[ktw  ];// +  c1over54 ;(D.f[dirTW  ])[kw ];//ktw
-			real mfbcc = (D.f[dirTN])[k];//[ktn  ];// +  c1over54 ;(D.f[dirTN  ])[k  ];//ktn
-			real mfbaa = (D.f[dirBS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[dirBS  ])[kbs];
-			real mfbca = (D.f[dirBN])[kb];//[kbn  ];// +  c1over54 ;(D.f[dirBN  ])[kb ];//kbn
-			real mfbac = (D.f[dirTS])[ks];//[kts  ];// +  c1over54 ;(D.f[dirTS  ])[ks ];//kts
-			real mfbbb = (D.f[dirZERO])[k];//[kzero];// +  c8over27 ;(D.f[dirZERO])[k  ];//kzero
-			real mfccc = (D.f[dirTNE])[k];//[ktne ];// +  c1over216;(D.f[dirTNE ])[k  ];//ktne
-			real mfaac = (D.f[dirTSW])[ksw];//[ktsw ];// +  c1over216;(D.f[dirTSW ])[ksw];//ktsw
-			real mfcac = (D.f[dirTSE])[ks];//[ktse ];// +  c1over216;(D.f[dirTSE ])[ks ];//ktse
-			real mfacc = (D.f[dirTNW])[kw];//[ktnw ];// +  c1over216;(D.f[dirTNW ])[kw ];//ktnw
-			real mfcca = (D.f[dirBNE])[kb];//[kbne ];// +  c1over216;(D.f[dirBNE ])[kb ];//kbne
-			real mfaaa = (D.f[dirBSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[dirBSW ])[kbsw];
-			real mfcaa = (D.f[dirBSE])[kbs];//[kbse ];// +  c1over216;(D.f[dirBSE ])[kbs];//kbse
-			real mfaca = (D.f[dirBNW])[kbw];//[kbnw ];// +  c1over216;(D.f[dirBNW ])[kbw];//kbnw
-											   ////////////////////////////////////////////////////////////////////////////////////
-			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-			real rho = c1o1 + drho;
-			////////////////////////////////////////////////////////////////////////////////////
-			real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-				(mfcbb - mfabb)) / rho;
-			real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-				(mfbcb - mfbab)) / rho;
-			real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-				(mfbbc - mfbba)) / rho;
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
-			real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
-			real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
-			vvx += fx*c1o2;
-			vvy += fy*c1o2;
-			vvz += fz*c1o2;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real omega = omega_in;
-			////////////////////////////////////////////////////////////////////////////////////
-			//fast
-			//real oMdrho = c1o1; // comp special
-			//real m0, m1, m2;
-			real vx2;
-			real vy2;
-			real vz2;
-			vx2 = vvx*vvx;
-			vy2 = vvy*vvy;
-			vz2 = vvz*vvz;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real wadjust;
-			//real qudricLimitP = c1o100;// * 0.0001f;
-			//real qudricLimitM = c1o100;// * 0.0001f;
-			//real qudricLimitD = c1o100;// * 0.001f;
-			//real s9 = minusomega;
-			//test
-			//s9 = 0.;
-
-
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real EQcbb = c0o1;
-			real EQabb = c0o1;
-			real EQbcb = c0o1;
-			real EQbab = c0o1;
-			real EQbbc = c0o1;
-			real EQbba = c0o1;
-			real EQccb = c0o1;
-			real EQaab = c0o1;
-			real EQcab = c0o1;
-			real EQacb = c0o1;
-			real EQcbc = c0o1;
-			real EQaba = c0o1;
-			real EQcba = c0o1;
-			real EQabc = c0o1;
-			real EQbcc = c0o1;
-			real EQbaa = c0o1;
-			real EQbca = c0o1;
-			real EQbac = c0o1;
-			real EQbbb = c0o1;
-			real EQccc = drho * c1o27;
-			real EQaac = drho * c1o3;
-			real EQcac = drho * c1o9;
-			real EQacc = drho * c1o9;
-			real EQcca = drho * c1o9;
-			real EQaaa = drho;
-			real EQcaa = drho * c1o3;
-			real EQaca = drho * c1o3;
-			////////////////////////////////////////////////////////////////////////////////////
-			backwardChimeraWithK(EQaaa, EQaab, EQaac, vvz, vz2, c1o1);
-			backwardChimeraWithK(EQaca, EQacb, EQacc, vvz, vz2, c1o3);
-			///////////////////////////////////////////////////////////
-			EQcaa = EQaca; EQcab = EQacb; EQcac = EQacc;
-			///////////////////////////////////////////////////////////
-			backwardChimeraWithK(EQcca, EQccb, EQccc, vvz, vz2, c1o9);
-
-			backwardChimeraWithK(EQaaa, EQaba, EQaca, vvy, vy2, c1o6);
-			backwardChimeraWithK(EQaab, EQabb, EQacb, vvy, vy2, c2o3);
-			backwardChimeraWithK(EQaac, EQabc, EQacc, vvy, vy2, c1o6);
-			backwardChimeraWithK(EQcaa, EQcba, EQcca, vvy, vy2, c1o18);
-			backwardChimeraWithK(EQcab, EQcbb, EQccb, vvy, vy2, c2o9);
-			backwardChimeraWithK(EQcac, EQcbc, EQccc, vvy, vy2, c1o18);
-
-			backwardChimeraWithK(EQaaa, EQbaa, EQcaa, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQaab, EQbab, EQcab, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQaac, EQbac, EQcac, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQaba, EQbba, EQcba, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQabb, EQbbb, EQcbb, vvx, vx2, c4o9);
-			backwardChimeraWithK(EQabc, EQbbc, EQcbc, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQaca, EQbca, EQcca, vvx, vx2, c1o36);
-			backwardChimeraWithK(EQacb, EQbcb, EQccb, vvx, vx2, c1o9);
-			backwardChimeraWithK(EQacc, EQbcc, EQccc, vvx, vx2, c1o36);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//Pre-condition
-			mfcbb -= EQcbb;
-			mfabb -= EQabb;
-			mfbcb -= EQbcb;
-			mfbab -= EQbab;
-			mfbbc -= EQbbc;
-			mfbba -= EQbba;
-			mfccb -= EQccb;
-			mfaab -= EQaab;
-			mfcab -= EQcab;
-			mfacb -= EQacb;
-			mfcbc -= EQcbc;
-			mfaba -= EQaba;
-			mfcba -= EQcba;
-			mfabc -= EQabc;
-			mfbcc -= EQbcc;
-			mfbaa -= EQbaa;
-			mfbca -= EQbca;
-			mfbac -= EQbac;
-			mfbbb -= EQbbb;
-			mfccc -= EQccc;
-			mfaac -= EQaac;
-			mfcac -= EQcac;
-			mfacc -= EQacc;
-			mfcca -= EQcca;
-			mfaaa -= EQaaa;
-			mfcaa -= EQcaa;
-			mfaca -= EQaca;
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//Hin
-			////////////////////////////////////////////////////////////////////////////////////
-			forwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
-			forwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			forwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
-			forwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			forwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			forwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			forwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
-			forwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			forwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
-
-			forwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
-			forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			forwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
-			forwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
-			forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			forwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
-			forwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
-			forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			forwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
-
-			forwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
-			forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			forwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
-			forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			forwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
-			forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			forwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			////Hin
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Z - Dir
-			//forwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c4o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Y - Dir
-			//forwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c2o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c2o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// X - Dir
-			//forwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, one);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//forwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// Cumulants
-			////////////////////////////////////////////////////////////////////////////////////
-			real OxxPyyPzz = c1o1; //omega; // one;	//set the bulk viscosity one is high / two is very low and zero is (too) high
-
-			////////////////////////////////////////////////////////////
-			//3.
-			//////////////////////////////
-			real OxyyPxzz = c1o1;
-			real OxyyMxzz = c1o1;
-			//real Oxyz = c1o1;
-			////////////////////////////////////////////////////////////
-			//4.
-			//////////////////////////////
-			real O4 = c1o1;
-			////////////////////////////////////////////////////////////
-			//5.
-			//////////////////////////////
-			real O5 = c1o1;
-			////////////////////////////////////////////////////////////
-			//6.
-			//////////////////////////////
-			real O6 = c1o1;
-			////////////////////////////////////////////////////////////
-
-
-			//central moments to cumulants
-			//4.
-			real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-
-			real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-
-
-
-
-			//2.
-			// linear combinations
-			real mxxPyyPzz = mfcaa + mfaca + mfaac;
-			real mxxMyy = mfcaa - mfaca;
-			real mxxMzz = mfcaa - mfaac;
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
-			{
-				real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
-				real dyuy = dxux + omega * c3o2 * mxxMyy;
-				real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-				//relax
-				mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-				mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-				mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-			}
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			////no correction
-			//mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
-			//mxxMyy += -(-omega) * (-mxxMyy);
-			//mxxMzz += -(-omega) * (-mxxMzz);
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			mfabb += omega * (-mfabb);
-			mfbab += omega * (-mfbab);
-			mfbba += omega * (-mfbba);
-
-			//////////////////////////////////////////////////////////////////////////
-
-			// linear combinations back
-			mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
-
-			//3.
-			// linear combinations
-
-			real mxxyPyzz = mfcba + mfabc;
-			real mxxyMyzz = mfcba - mfabc;
-
-			real mxxzPyyz = mfcab + mfacb;
-			real mxxzMyyz = mfcab - mfacb;
-
-			real mxyyPxzz = mfbca + mfbac;
-			real mxyyMxzz = mfbca - mfbac;
-
-			//relax
-			//////////////////////////////////////////////////////////////////////////
-			mfbbb += OxyyMxzz * (-mfbbb);
-			mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
-			mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
-			mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
-			mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
-			mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
-			mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
-			//////////////////////////////////////////////////////////////////////////
-
-			mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-			mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-			mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-			mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-			mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-			mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-
-			//4.
-			//////////////////////////////////////////////////////////////////////////
-			CUMacc += O4 * (-CUMacc);
-			CUMcac += O4 * (-CUMcac);
-			CUMcca += O4 * (-CUMcca);
-
-			CUMbbc += O4 * (-CUMbbc);
-			CUMbcb += O4 * (-CUMbcb);
-			CUMcbb += O4 * (-CUMcbb);
-			//////////////////////////////////////////////////////////////////////////
-
-
-			//5.
-			CUMbcc += O5 * (-CUMbcc);
-			CUMcbc += O5 * (-CUMcbc);
-			CUMccb += O5 * (-CUMccb);
-
-			//6.
-			CUMccc += O6 * (-CUMccc);
-
-
-
-			//back cumulants to central moments
-			//4.
-			mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
-			mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
-			mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
-
-			mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
-			mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
-			mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
-
-			//5.
-			mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
-			mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
-			mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
-
-			//6.
-			mfccc = CUMccc - ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-					+ c2o1 * (mfcaa * mfaca * mfaac)
-					+ c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
-				- c1o3 * (mfacc + mfcac + mfcca) / rho
-				- c1o9 * (mfcaa + mfaca + mfaac) / rho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-					+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
-				+ c1o27*((drho * drho - drho) / (rho*rho)));
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			mfbaa = -mfbaa;
-			mfaba = -mfaba;
-			mfaab = -mfaab;
-			////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//back
-			////////////////////////////////////////////////////////////////////////////////////
-			backwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
-			backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			backwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
-			backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			backwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
-			backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			backwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
-
-			backwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
-			backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-			backwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
-			backwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
-			backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-			backwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
-			backwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
-			backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-			backwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
-
-			backwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
-			backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-			backwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
-			backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-			backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-			backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-			backwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
-			backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-			backwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			mfcbb+=EQcbb;
-			mfabb+=EQabb;
-			mfbcb+=EQbcb;
-			mfbab+=EQbab;
-			mfbbc+=EQbbc;
-			mfbba+=EQbba;
-			mfccb+=EQccb;
-			mfaab+=EQaab;
-			mfcab+=EQcab;
-			mfacb+=EQacb;
-			mfcbc+=EQcbc;
-			mfaba+=EQaba;
-			mfcba+=EQcba;
-			mfabc+=EQabc;
-			mfbcc+=EQbcc;
-			mfbaa+=EQbaa;
-			mfbca+=EQbca;
-			mfbac+=EQbac;
-			mfbbb+=EQbbb;
-			mfccc+=EQccc;
-			mfaac+=EQaac;
-			mfcac+=EQcac;
-			mfacc+=EQacc;
-			mfcca+=EQcca;
-			mfaaa+=EQaaa;
-			mfcaa+=EQcaa;
-			mfaca+=EQaca;
-
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			////back
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Z - Dir
-			//backwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, one);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o3);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// Y - Dir
-			//backwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaab, mfabb, mfacb, vvy, vy2, c2o3);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o6);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbaa, mfbba, mfbca, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbab, mfbbb, mfbcb, vvz, vz2);
-			///////////b//////////////////////////////////////////////////////////////////////////
-			//backwardChimera(mfbac, mfbbc, mfbcc, vvz, vz2);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o18);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcab, mfcbb, mfccb, vvy, vy2, c2o9);
-			///////////c//////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			////mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
-			//////////////////////////////////////////////////////////////////////////////////////
-			//// X - Dir
-			//backwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaba, mfbba, mfcba, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaab, mfbab, mfcab, vvx, vx2, c1o9);
-			/////////////b////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfabb, mfbbb, mfcbb, vvx, vx2, c4o9);
-			/////////////b////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfacb, mfbcb, mfccb, vvx, vx2, c1o9);
-			//////////////////////////////////////////////////////////////////////////////////////
-			//////////////////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o36);
-			/////////////c////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfabc, mfbbc, mfcbc, vvx, vx2, c1o9);
-			/////////////c////////////////////////////////////////////////////////////////////////
-			//backwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o36);
-			//////////////////////////////////////////////////////////////////////////////////////
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			real drhoPost =
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-			mfbbb += drho - drhoPost;
-			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[dirE])[k] = mfabb;//(D.f[ dirE   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ dirE   ])[k   ]                                                                     
-			(D.f[dirW])[kw] = mfcbb;//(D.f[ dirW   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ dirW   ])[kw  ]                                                                   
-			(D.f[dirN])[k] = mfbab;//(D.f[ dirN   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ dirN   ])[k   ]
-			(D.f[dirS])[ks] = mfbcb;//(D.f[ dirS   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ dirS   ])[ks  ]
-			(D.f[dirT])[k] = mfbba;//(D.f[ dirT   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ dirT   ])[k   ]
-			(D.f[dirB])[kb] = mfbbc;//(D.f[ dirB   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ dirB   ])[kb  ]
-			(D.f[dirNE])[k] = mfaab;//(D.f[ dirNE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ dirNE  ])[k   ]
-			(D.f[dirSW])[ksw] = mfccb;//(D.f[ dirSW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ dirSW  ])[ksw ]
-			(D.f[dirSE])[ks] = mfacb;//(D.f[ dirSE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ dirSE  ])[ks  ]
-			(D.f[dirNW])[kw] = mfcab;//(D.f[ dirNW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ dirNW  ])[kw  ]
-			(D.f[dirTE])[k] = mfaba;//(D.f[ dirTE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ dirTE  ])[k   ]
-			(D.f[dirBW])[kbw] = mfcbc;//(D.f[ dirBW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ dirBW  ])[kbw ]
-			(D.f[dirBE])[kb] = mfabc;//(D.f[ dirBE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ dirBE  ])[kb  ]
-			(D.f[dirTW])[kw] = mfcba;//(D.f[ dirTW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ dirTW  ])[kw  ]
-			(D.f[dirTN])[k] = mfbaa;//(D.f[ dirTN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ dirTN  ])[k   ]
-			(D.f[dirBS])[kbs] = mfbcc;//(D.f[ dirBS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ dirBS  ])[kbs ]
-			(D.f[dirBN])[kb] = mfbac;//(D.f[ dirBN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ dirBN  ])[kb  ]
-			(D.f[dirTS])[ks] = mfbca;//(D.f[ dirTS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ dirTS  ])[ks  ]
-			(D.f[dirZERO])[k] = mfbbb;//(D.f[ dirZERO])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ dirZERO])[k   ]
-			(D.f[dirTNE])[k] = mfaaa;//(D.f[ dirTNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ dirTNE ])[k   ]
-			(D.f[dirTSE])[ks] = mfaca;//(D.f[ dirTSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ dirTSE ])[ks  ]
-			(D.f[dirBNE])[kb] = mfaac;//(D.f[ dirBNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ dirBNE ])[kb  ]
-			(D.f[dirBSE])[kbs] = mfacc;//(D.f[ dirBSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ dirBSE ])[kbs ]
-			(D.f[dirTNW])[kw] = mfcaa;//(D.f[ dirTNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ dirTNW ])[kw  ]
-			(D.f[dirTSW])[ksw] = mfcca;//(D.f[ dirTSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ dirTSW ])[ksw ]
-			(D.f[dirBNW])[kbw] = mfcac;//(D.f[ dirBNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ dirBNW ])[kbw ]
-			(D.f[dirBSW])[kbsw] = mfccc;//(D.f[ dirBSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ dirBSW ])[kbsw]
-			////////////////////////////////////////////////////////////////////////////////////
-		}
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    const unsigned  x = threadIdx.x;  // Globaler x-Index 
+    const unsigned  y = blockIdx.x;   // Globaler y-Index 
+    const unsigned  z = blockIdx.y;   // Globaler z-Index 
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    const unsigned k = nx*(ny*z + y) + x;
+    //////////////////////////////////////////////////////////////////////////
+
+    if (k<size_Mat)
+    {
+        ////////////////////////////////////////////////////////////////////////////////
+        unsigned int BC;
+        BC = bcMatD[k];
+
+        if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
+        {
+            Distributions27 D;
+            if (EvenOrOdd == true)
+            {
+                D.f[dirE] = &DDStart[dirE   *size_Mat];
+                D.f[dirW] = &DDStart[dirW   *size_Mat];
+                D.f[dirN] = &DDStart[dirN   *size_Mat];
+                D.f[dirS] = &DDStart[dirS   *size_Mat];
+                D.f[dirT] = &DDStart[dirT   *size_Mat];
+                D.f[dirB] = &DDStart[dirB   *size_Mat];
+                D.f[dirNE] = &DDStart[dirNE  *size_Mat];
+                D.f[dirSW] = &DDStart[dirSW  *size_Mat];
+                D.f[dirSE] = &DDStart[dirSE  *size_Mat];
+                D.f[dirNW] = &DDStart[dirNW  *size_Mat];
+                D.f[dirTE] = &DDStart[dirTE  *size_Mat];
+                D.f[dirBW] = &DDStart[dirBW  *size_Mat];
+                D.f[dirBE] = &DDStart[dirBE  *size_Mat];
+                D.f[dirTW] = &DDStart[dirTW  *size_Mat];
+                D.f[dirTN] = &DDStart[dirTN  *size_Mat];
+                D.f[dirBS] = &DDStart[dirBS  *size_Mat];
+                D.f[dirBN] = &DDStart[dirBN  *size_Mat];
+                D.f[dirTS] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirTNE] = &DDStart[dirTNE *size_Mat];
+                D.f[dirTSW] = &DDStart[dirTSW *size_Mat];
+                D.f[dirTSE] = &DDStart[dirTSE *size_Mat];
+                D.f[dirTNW] = &DDStart[dirTNW *size_Mat];
+                D.f[dirBNE] = &DDStart[dirBNE *size_Mat];
+                D.f[dirBSW] = &DDStart[dirBSW *size_Mat];
+                D.f[dirBSE] = &DDStart[dirBSE *size_Mat];
+                D.f[dirBNW] = &DDStart[dirBNW *size_Mat];
+            }
+            else
+            {
+                D.f[dirW] = &DDStart[dirE   *size_Mat];
+                D.f[dirE] = &DDStart[dirW   *size_Mat];
+                D.f[dirS] = &DDStart[dirN   *size_Mat];
+                D.f[dirN] = &DDStart[dirS   *size_Mat];
+                D.f[dirB] = &DDStart[dirT   *size_Mat];
+                D.f[dirT] = &DDStart[dirB   *size_Mat];
+                D.f[dirSW] = &DDStart[dirNE  *size_Mat];
+                D.f[dirNE] = &DDStart[dirSW  *size_Mat];
+                D.f[dirNW] = &DDStart[dirSE  *size_Mat];
+                D.f[dirSE] = &DDStart[dirNW  *size_Mat];
+                D.f[dirBW] = &DDStart[dirTE  *size_Mat];
+                D.f[dirTE] = &DDStart[dirBW  *size_Mat];
+                D.f[dirTW] = &DDStart[dirBE  *size_Mat];
+                D.f[dirBE] = &DDStart[dirTW  *size_Mat];
+                D.f[dirBS] = &DDStart[dirTN  *size_Mat];
+                D.f[dirTN] = &DDStart[dirBS  *size_Mat];
+                D.f[dirTS] = &DDStart[dirBN  *size_Mat];
+                D.f[dirBN] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirBSW] = &DDStart[dirTNE *size_Mat];
+                D.f[dirBNE] = &DDStart[dirTSW *size_Mat];
+                D.f[dirBNW] = &DDStart[dirTSE *size_Mat];
+                D.f[dirBSE] = &DDStart[dirTNW *size_Mat];
+                D.f[dirTSW] = &DDStart[dirBNE *size_Mat];
+                D.f[dirTNE] = &DDStart[dirBSW *size_Mat];
+                D.f[dirTNW] = &DDStart[dirBSE *size_Mat];
+                D.f[dirTSE] = &DDStart[dirBNW *size_Mat];
+            }
+
+            ////////////////////////////////////////////////////////////////////////////////
+            //index
+            //unsigned int kzero= k;
+            //unsigned int ke   = k;
+            unsigned int kw = neighborX[k];
+            //unsigned int kn   = k;
+            unsigned int ks = neighborY[k];
+            //unsigned int kt   = k;
+            unsigned int kb = neighborZ[k];
+            unsigned int ksw = neighborY[kw];
+            //unsigned int kne  = k;
+            //unsigned int kse  = ks;
+            //unsigned int knw  = kw;
+            unsigned int kbw = neighborZ[kw];
+            //unsigned int kte  = k;
+            //unsigned int kbe  = kb;
+            //unsigned int ktw  = kw;
+            unsigned int kbs = neighborZ[ks];
+            //unsigned int ktn  = k;
+            //unsigned int kbn  = kb;
+            //unsigned int kts  = ks;
+            //unsigned int ktse = ks;
+            //unsigned int kbnw = kbw;
+            //unsigned int ktnw = kw;
+            //unsigned int kbse = kbs;
+            //unsigned int ktsw = ksw;
+            //unsigned int kbne = kb;
+            //unsigned int ktne = k;
+            unsigned int kbsw = neighborZ[ksw];
+
+
+
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            real mfcbb = (D.f[dirE])[k];//[ke   ];// +  c2over27 ;(D.f[dirE   ])[k  ];//ke
+            real mfabb = (D.f[dirW])[kw];//[kw   ];// +  c2over27 ;(D.f[dirW   ])[kw ];
+            real mfbcb = (D.f[dirN])[k];//[kn   ];// +  c2over27 ;(D.f[dirN   ])[k  ];//kn
+            real mfbab = (D.f[dirS])[ks];//[ks   ];// +  c2over27 ;(D.f[dirS   ])[ks ];
+            real mfbbc = (D.f[dirT])[k];//[kt   ];// +  c2over27 ;(D.f[dirT   ])[k  ];//kt
+            real mfbba = (D.f[dirB])[kb];//[kb   ];// +  c2over27 ;(D.f[dirB   ])[kb ];
+            real mfccb = (D.f[dirNE])[k];//[kne  ];// +  c1over54 ;(D.f[dirNE  ])[k  ];//kne
+            real mfaab = (D.f[dirSW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[dirSW  ])[ksw];
+            real mfcab = (D.f[dirSE])[ks];//[kse  ];// +  c1over54 ;(D.f[dirSE  ])[ks ];//kse
+            real mfacb = (D.f[dirNW])[kw];//[knw  ];// +  c1over54 ;(D.f[dirNW  ])[kw ];//knw
+            real mfcbc = (D.f[dirTE])[k];//[kte  ];// +  c1over54 ;(D.f[dirTE  ])[k  ];//kte
+            real mfaba = (D.f[dirBW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[dirBW  ])[kbw];
+            real mfcba = (D.f[dirBE])[kb];//[kbe  ];// +  c1over54 ;(D.f[dirBE  ])[kb ];//kbe
+            real mfabc = (D.f[dirTW])[kw];//[ktw  ];// +  c1over54 ;(D.f[dirTW  ])[kw ];//ktw
+            real mfbcc = (D.f[dirTN])[k];//[ktn  ];// +  c1over54 ;(D.f[dirTN  ])[k  ];//ktn
+            real mfbaa = (D.f[dirBS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[dirBS  ])[kbs];
+            real mfbca = (D.f[dirBN])[kb];//[kbn  ];// +  c1over54 ;(D.f[dirBN  ])[kb ];//kbn
+            real mfbac = (D.f[dirTS])[ks];//[kts  ];// +  c1over54 ;(D.f[dirTS  ])[ks ];//kts
+            real mfbbb = (D.f[dirZERO])[k];//[kzero];// +  c8over27 ;(D.f[dirZERO])[k  ];//kzero
+            real mfccc = (D.f[dirTNE])[k];//[ktne ];// +  c1over216;(D.f[dirTNE ])[k  ];//ktne
+            real mfaac = (D.f[dirTSW])[ksw];//[ktsw ];// +  c1over216;(D.f[dirTSW ])[ksw];//ktsw
+            real mfcac = (D.f[dirTSE])[ks];//[ktse ];// +  c1over216;(D.f[dirTSE ])[ks ];//ktse
+            real mfacc = (D.f[dirTNW])[kw];//[ktnw ];// +  c1over216;(D.f[dirTNW ])[kw ];//ktnw
+            real mfcca = (D.f[dirBNE])[kb];//[kbne ];// +  c1over216;(D.f[dirBNE ])[kb ];//kbne
+            real mfaaa = (D.f[dirBSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[dirBSW ])[kbsw];
+            real mfcaa = (D.f[dirBSE])[kbs];//[kbse ];// +  c1over216;(D.f[dirBSE ])[kbs];//kbse
+            real mfaca = (D.f[dirBNW])[kbw];//[kbnw ];// +  c1over216;(D.f[dirBNW ])[kbw];//kbnw
+                                               ////////////////////////////////////////////////////////////////////////////////////
+            real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+
+            real rho = c1o1 + drho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                (mfcbb - mfabb)) / rho;
+            real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                (mfbcb - mfbab)) / rho;
+            real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                (mfbbc - mfbba)) / rho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
+            real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
+            real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
+            vvx += fx*c1o2;
+            vvy += fy*c1o2;
+            vvz += fz*c1o2;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real omega = omega_in;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //fast
+            //real oMdrho = c1o1; // comp special
+            //real m0, m1, m2;
+            real vx2;
+            real vy2;
+            real vz2;
+            vx2 = vvx*vvx;
+            vy2 = vvy*vvy;
+            vz2 = vvz*vvz;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real wadjust;
+            //real qudricLimitP = c1o100;// * 0.0001f;
+            //real qudricLimitM = c1o100;// * 0.0001f;
+            //real qudricLimitD = c1o100;// * 0.001f;
+            //real s9 = minusomega;
+            //test
+            //s9 = 0.;
+
+
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            real EQcbb = c0o1;
+            real EQabb = c0o1;
+            real EQbcb = c0o1;
+            real EQbab = c0o1;
+            real EQbbc = c0o1;
+            real EQbba = c0o1;
+            real EQccb = c0o1;
+            real EQaab = c0o1;
+            real EQcab = c0o1;
+            real EQacb = c0o1;
+            real EQcbc = c0o1;
+            real EQaba = c0o1;
+            real EQcba = c0o1;
+            real EQabc = c0o1;
+            real EQbcc = c0o1;
+            real EQbaa = c0o1;
+            real EQbca = c0o1;
+            real EQbac = c0o1;
+            real EQbbb = c0o1;
+            real EQccc = drho * c1o27;
+            real EQaac = drho * c1o3;
+            real EQcac = drho * c1o9;
+            real EQacc = drho * c1o9;
+            real EQcca = drho * c1o9;
+            real EQaaa = drho;
+            real EQcaa = drho * c1o3;
+            real EQaca = drho * c1o3;
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::backwardChimeraWithK(EQaaa, EQaab, EQaac, vvz, vz2, c1o1);
+            vf::lbm::backwardChimeraWithK(EQaca, EQacb, EQacc, vvz, vz2, c1o3);
+            ///////////////////////////////////////////////////////////
+            EQcaa = EQaca; EQcab = EQacb; EQcac = EQacc;
+            ///////////////////////////////////////////////////////////
+            vf::lbm::backwardChimeraWithK(EQcca, EQccb, EQccc, vvz, vz2, c1o9);
+
+            vf::lbm::backwardChimeraWithK(EQaaa, EQaba, EQaca, vvy, vy2, c1o6);
+            vf::lbm::backwardChimeraWithK(EQaab, EQabb, EQacb, vvy, vy2, c2o3);
+            vf::lbm::backwardChimeraWithK(EQaac, EQabc, EQacc, vvy, vy2, c1o6);
+            vf::lbm::backwardChimeraWithK(EQcaa, EQcba, EQcca, vvy, vy2, c1o18);
+            vf::lbm::backwardChimeraWithK(EQcab, EQcbb, EQccb, vvy, vy2, c2o9);
+            vf::lbm::backwardChimeraWithK(EQcac, EQcbc, EQccc, vvy, vy2, c1o18);
+
+            vf::lbm::backwardChimeraWithK(EQaaa, EQbaa, EQcaa, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQaab, EQbab, EQcab, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQaac, EQbac, EQcac, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQaba, EQbba, EQcba, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQabb, EQbbb, EQcbb, vvx, vx2, c4o9);
+            vf::lbm::backwardChimeraWithK(EQabc, EQbbc, EQcbc, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQaca, EQbca, EQcca, vvx, vx2, c1o36);
+            vf::lbm::backwardChimeraWithK(EQacb, EQbcb, EQccb, vvx, vx2, c1o9);
+            vf::lbm::backwardChimeraWithK(EQacc, EQbcc, EQccc, vvx, vx2, c1o36);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //Pre-condition
+            mfcbb -= EQcbb;
+            mfabb -= EQabb;
+            mfbcb -= EQbcb;
+            mfbab -= EQbab;
+            mfbbc -= EQbbc;
+            mfbba -= EQbba;
+            mfccb -= EQccb;
+            mfaab -= EQaab;
+            mfcab -= EQcab;
+            mfacb -= EQacb;
+            mfcbc -= EQcbc;
+            mfaba -= EQaba;
+            mfcba -= EQcba;
+            mfabc -= EQabc;
+            mfbcc -= EQbcc;
+            mfbaa -= EQbaa;
+            mfbca -= EQbca;
+            mfbac -= EQbac;
+            mfbbb -= EQbbb;
+            mfccc -= EQccc;
+            mfaac -= EQaac;
+            mfcac -= EQcac;
+            mfacc -= EQacc;
+            mfcca -= EQcca;
+            mfaaa -= EQaaa;
+            mfcaa -= EQcaa;
+            mfaca -= EQaca;
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //Hin
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::forwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
+            vf::lbm::forwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            vf::lbm::forwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
+            vf::lbm::forwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            vf::lbm::forwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            vf::lbm::forwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            vf::lbm::forwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
+            vf::lbm::forwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            vf::lbm::forwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
+
+            vf::lbm::forwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
+            vf::lbm::forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::forwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
+            vf::lbm::forwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
+            vf::lbm::forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::forwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
+            vf::lbm::forwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
+            vf::lbm::forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::forwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
+
+            vf::lbm::forwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
+            vf::lbm::forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::forwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
+            vf::lbm::forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::forwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
+            vf::lbm::forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::forwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////Hin
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Z - Dir
+            //forwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c4o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Y - Dir
+            //forwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c2o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c2o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// X - Dir
+            //forwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, one);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //forwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Cumulants
+            ////////////////////////////////////////////////////////////////////////////////////
+            real OxxPyyPzz = c1o1; //omega; // one;	//set the bulk viscosity one is high / two is very low and zero is (too) high
+
+            ////////////////////////////////////////////////////////////
+            //3.
+            //////////////////////////////
+            real OxyyPxzz = c1o1;
+            real OxyyMxzz = c1o1;
+            //real Oxyz = c1o1;
+            ////////////////////////////////////////////////////////////
+            //4.
+            //////////////////////////////
+            real O4 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //5.
+            //////////////////////////////
+            real O5 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //6.
+            //////////////////////////////
+            real O6 = c1o1;
+            ////////////////////////////////////////////////////////////
+
+
+            //central moments to cumulants
+            //4.
+            real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+            real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+            real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
+
+            real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
+            real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
+            real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
+
+            //5.
+            real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+            real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+            real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+
+            //6.
+
+            real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                    + c2o1 * (mfcaa * mfaca * mfaac)
+                    + c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
+                - c1o3 * (mfacc + mfcac + mfcca) / rho
+                - c1o9 * (mfcaa + mfaca + mfaac) / rho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                    + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
+                + c1o27*((drho * drho - drho) / (rho*rho)));
+
+
+
+
+            //2.
+            // linear combinations
+            real mxxPyyPzz = mfcaa + mfaca + mfaac;
+            real mxxMyy = mfcaa - mfaca;
+            real mxxMzz = mfcaa - mfaac;
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
+            {
+                real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                real dyuy = dxux + omega * c3o2 * mxxMyy;
+                real dzuz = dxux + omega * c3o2 * mxxMzz;
+
+                //relax
+                mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+
+            }
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            ////no correction
+            //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
+            //mxxMyy += -(-omega) * (-mxxMyy);
+            //mxxMzz += -(-omega) * (-mxxMzz);
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            mfabb += omega * (-mfabb);
+            mfbab += omega * (-mfbab);
+            mfbba += omega * (-mfbba);
+
+            //////////////////////////////////////////////////////////////////////////
+
+            // linear combinations back
+            mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
+
+            //3.
+            // linear combinations
+
+            real mxxyPyzz = mfcba + mfabc;
+            real mxxyMyzz = mfcba - mfabc;
+
+            real mxxzPyyz = mfcab + mfacb;
+            real mxxzMyyz = mfcab - mfacb;
+
+            real mxyyPxzz = mfbca + mfbac;
+            real mxyyMxzz = mfbca - mfbac;
+
+            //relax
+            //////////////////////////////////////////////////////////////////////////
+            mfbbb += OxyyMxzz * (-mfbbb);
+            mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
+            mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
+            mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
+            mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
+            mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
+            mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
+            //////////////////////////////////////////////////////////////////////////
+
+            mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
+            mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
+            mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
+            mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
+            mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
+            mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+
+            //4.
+            //////////////////////////////////////////////////////////////////////////
+            CUMacc += O4 * (-CUMacc);
+            CUMcac += O4 * (-CUMcac);
+            CUMcca += O4 * (-CUMcca);
+
+            CUMbbc += O4 * (-CUMbbc);
+            CUMbcb += O4 * (-CUMbcb);
+            CUMcbb += O4 * (-CUMcbb);
+            //////////////////////////////////////////////////////////////////////////
+
+
+            //5.
+            CUMbcc += O5 * (-CUMbcc);
+            CUMcbc += O5 * (-CUMcbc);
+            CUMccb += O5 * (-CUMccb);
+
+            //6.
+            CUMccc += O6 * (-CUMccc);
+
+
+
+            //back cumulants to central moments
+            //4.
+            mfcbb = CUMcbb + ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) / rho;
+            mfbcb = CUMbcb + ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) / rho;
+            mfbbc = CUMbbc + ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) / rho;
+
+            mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) / rho - c1o9*(drho / rho));
+            mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) / rho - c1o9*(drho / rho));
+            mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) / rho - c1o9*(drho / rho));
+
+            //5.
+            mfbcc = CUMbcc + ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) / rho;
+            mfcbc = CUMcbc + ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) / rho;
+            mfccb = CUMccb + ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) / rho;
+
+            //6.
+            mfccc = CUMccc - ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) / rho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                    + c2o1 * (mfcaa * mfaca * mfaac)
+                    + c16o1 *  mfbba * mfbab * mfabb) / (rho * rho)
+                - c1o3 * (mfacc + mfcac + mfcca) / rho
+                - c1o9 * (mfcaa + mfaca + mfaac) / rho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                    + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) / (rho * rho) * c2o3
+                + c1o27*((drho * drho - drho) / (rho*rho)));
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            mfbaa = -mfbaa;
+            mfaba = -mfaba;
+            mfaab = -mfaab;
+            ////////////////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //back
+            ////////////////////////////////////////////////////////////////////////////////////
+            vf::lbm::backwardChimera(mfaaa, mfaab, mfaac, vvz, vz2);
+            vf::lbm::backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            vf::lbm::backwardChimera(mfaca, mfacb, mfacc, vvz, vz2);
+            vf::lbm::backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            vf::lbm::backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            vf::lbm::backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            vf::lbm::backwardChimera(mfcaa, mfcab, mfcac, vvz, vz2);
+            vf::lbm::backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            vf::lbm::backwardChimera(mfcca, mfccb, mfccc, vvz, vz2);
+
+            vf::lbm::backwardChimera(mfaaa, mfaba, mfaca, vvy, vy2);
+            vf::lbm::backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::backwardChimera(mfaac, mfabc, mfacc, vvy, vy2);
+            vf::lbm::backwardChimera(mfbaa, mfbba, mfbca, vvy, vy2);
+            vf::lbm::backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::backwardChimera(mfbac, mfbbc, mfbcc, vvy, vy2);
+            vf::lbm::backwardChimera(mfcaa, mfcba, mfcca, vvy, vy2);
+            vf::lbm::backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::backwardChimera(mfcac, mfcbc, mfccc, vvy, vy2);
+
+            vf::lbm::backwardChimera(mfaaa, mfbaa, mfcaa, vvx, vx2);
+            vf::lbm::backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::backwardChimera(mfaac, mfbac, mfcac, vvx, vx2);
+            vf::lbm::backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::backwardChimera(mfaca, mfbca, mfcca, vvx, vx2);
+            vf::lbm::backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::backwardChimera(mfacc, mfbcc, mfccc, vvx, vx2);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            mfcbb+=EQcbb;
+            mfabb+=EQabb;
+            mfbcb+=EQbcb;
+            mfbab+=EQbab;
+            mfbbc+=EQbbc;
+            mfbba+=EQbba;
+            mfccb+=EQccb;
+            mfaab+=EQaab;
+            mfcab+=EQcab;
+            mfacb+=EQacb;
+            mfcbc+=EQcbc;
+            mfaba+=EQaba;
+            mfcba+=EQcba;
+            mfabc+=EQabc;
+            mfbcc+=EQbcc;
+            mfbaa+=EQbaa;
+            mfbca+=EQbca;
+            mfbac+=EQbac;
+            mfbbb+=EQbbb;
+            mfccc+=EQccc;
+            mfaac+=EQaac;
+            mfcac+=EQcac;
+            mfacc+=EQacc;
+            mfcca+=EQcca;
+            mfaaa+=EQaaa;
+            mfcaa+=EQcaa;
+            mfaca+=EQaca;
+
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////back
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Z - Dir
+            //backwardChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, one);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfaba, mfabb, mfabc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c1o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbaa, mfbab, mfbac, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbba, mfbbb, mfbbc, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbca, mfbcb, mfbcc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c1o3);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfcba, mfcbb, mfcbc, vvz, vz2);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// Y - Dir
+            //backwardChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaab, mfabb, mfacb, vvy, vy2, c2o3);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c1o6);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbaa, mfbba, mfbca, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbab, mfbbb, mfbcb, vvz, vz2);
+            ///////////b//////////////////////////////////////////////////////////////////////////
+            //backwardChimera(mfbac, mfbbc, mfbcc, vvz, vz2);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c1o18);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcab, mfcbb, mfccb, vvy, vy2, c2o9);
+            ///////////c//////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c1o18);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            ////mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
+            //////////////////////////////////////////////////////////////////////////////////////
+            //// X - Dir
+            //backwardChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaba, mfbba, mfcba, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaab, mfbab, mfcab, vvx, vx2, c1o9);
+            /////////////b////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfabb, mfbbb, mfcbb, vvx, vx2, c4o9);
+            /////////////b////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfacb, mfbcb, mfccb, vvx, vx2, c1o9);
+            //////////////////////////////////////////////////////////////////////////////////////
+            //////////////////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c1o36);
+            /////////////c////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfabc, mfbbc, mfcbc, vvx, vx2, c1o9);
+            /////////////c////////////////////////////////////////////////////////////////////////
+            //backwardChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c1o36);
+            //////////////////////////////////////////////////////////////////////////////////////
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            real drhoPost =
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                    ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+            mfbbb += drho - drhoPost;
+            ////////////////////////////////////////////////////////////////////////////////////
+            (D.f[dirE])[k] = mfabb;//(D.f[ dirE   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ dirE   ])[k   ]                                                                     
+            (D.f[dirW])[kw] = mfcbb;//(D.f[ dirW   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ dirW   ])[kw  ]                                                                   
+            (D.f[dirN])[k] = mfbab;//(D.f[ dirN   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ dirN   ])[k   ]
+            (D.f[dirS])[ks] = mfbcb;//(D.f[ dirS   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ dirS   ])[ks  ]
+            (D.f[dirT])[k] = mfbba;//(D.f[ dirT   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ dirT   ])[k   ]
+            (D.f[dirB])[kb] = mfbbc;//(D.f[ dirB   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ dirB   ])[kb  ]
+            (D.f[dirNE])[k] = mfaab;//(D.f[ dirNE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ dirNE  ])[k   ]
+            (D.f[dirSW])[ksw] = mfccb;//(D.f[ dirSW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ dirSW  ])[ksw ]
+            (D.f[dirSE])[ks] = mfacb;//(D.f[ dirSE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ dirSE  ])[ks  ]
+            (D.f[dirNW])[kw] = mfcab;//(D.f[ dirNW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ dirNW  ])[kw  ]
+            (D.f[dirTE])[k] = mfaba;//(D.f[ dirTE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ dirTE  ])[k   ]
+            (D.f[dirBW])[kbw] = mfcbc;//(D.f[ dirBW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ dirBW  ])[kbw ]
+            (D.f[dirBE])[kb] = mfabc;//(D.f[ dirBE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ dirBE  ])[kb  ]
+            (D.f[dirTW])[kw] = mfcba;//(D.f[ dirTW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ dirTW  ])[kw  ]
+            (D.f[dirTN])[k] = mfbaa;//(D.f[ dirTN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ dirTN  ])[k   ]
+            (D.f[dirBS])[kbs] = mfbcc;//(D.f[ dirBS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ dirBS  ])[kbs ]
+            (D.f[dirBN])[kb] = mfbac;//(D.f[ dirBN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ dirBN  ])[kb  ]
+            (D.f[dirTS])[ks] = mfbca;//(D.f[ dirTS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ dirTS  ])[ks  ]
+            (D.f[dirZERO])[k] = mfbbb;//(D.f[ dirZERO])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ dirZERO])[k   ]
+            (D.f[dirTNE])[k] = mfaaa;//(D.f[ dirTNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ dirTNE ])[k   ]
+            (D.f[dirTSE])[ks] = mfaca;//(D.f[ dirTSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ dirTSE ])[ks  ]
+            (D.f[dirBNE])[kb] = mfaac;//(D.f[ dirBNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ dirBNE ])[kb  ]
+            (D.f[dirBSE])[kbs] = mfacc;//(D.f[ dirBSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ dirBSE ])[kbs ]
+            (D.f[dirTNW])[kw] = mfcaa;//(D.f[ dirTNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ dirTNW ])[kw  ]
+            (D.f[dirTSW])[ksw] = mfcca;//(D.f[ dirTSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ dirTSW ])[ksw ]
+            (D.f[dirBNW])[kbw] = mfcac;//(D.f[ dirBNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ dirBNW ])[kbw ]
+            (D.f[dirBSW])[kbsw] = mfccc;//(D.f[ dirBSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ dirBSW ])[kbsw]
+            ////////////////////////////////////////////////////////////////////////////////////
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -1798,483 +1754,483 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 ////////////////////////////////////////////////////////////////////////////////
 extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
-	real omega,
-	unsigned int* bcMatD,
-	unsigned int* neighborX,
-	unsigned int* neighborY,
-	unsigned int* neighborZ,
-	real* DDStart,
-	int size_Mat,
-	int level,
-	real* forces,
-	bool EvenOrOdd)
+    real omega,
+    unsigned int* bcMatD,
+    unsigned int* neighborX,
+    unsigned int* neighborY,
+    unsigned int* neighborZ,
+    real* DDStart,
+    int size_Mat,
+    int level,
+    real* forces,
+    bool EvenOrOdd)
 {
-	////////////////////////////////////////////////////////////////////////////////
-	const unsigned  x = threadIdx.x;  // Globaler x-Index 
-	const unsigned  y = blockIdx.x;   // Globaler y-Index 
-	const unsigned  z = blockIdx.y;   // Globaler z-Index 
-
-	const unsigned nx = blockDim.x;
-	const unsigned ny = gridDim.x;
-
-	const unsigned k = nx*(ny*z + y) + x;
-	//////////////////////////////////////////////////////////////////////////
-
-	if (k<size_Mat)
-	{
-		////////////////////////////////////////////////////////////////////////////////
-		unsigned int BC;
-		BC = bcMatD[k];
-
-		if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
-		{
-			Distributions27 D;
-			if (EvenOrOdd == true)
-			{
-				D.f[dirE   ] = &DDStart[dirE   *size_Mat];
-				D.f[dirW   ] = &DDStart[dirW   *size_Mat];
-				D.f[dirN   ] = &DDStart[dirN   *size_Mat];
-				D.f[dirS   ] = &DDStart[dirS   *size_Mat];
-				D.f[dirT   ] = &DDStart[dirT   *size_Mat];
-				D.f[dirB   ] = &DDStart[dirB   *size_Mat];
-				D.f[dirNE  ] = &DDStart[dirNE  *size_Mat];
-				D.f[dirSW  ] = &DDStart[dirSW  *size_Mat];
-				D.f[dirSE  ] = &DDStart[dirSE  *size_Mat];
-				D.f[dirNW  ] = &DDStart[dirNW  *size_Mat];
-				D.f[dirTE  ] = &DDStart[dirTE  *size_Mat];
-				D.f[dirBW  ] = &DDStart[dirBW  *size_Mat];
-				D.f[dirBE  ] = &DDStart[dirBE  *size_Mat];
-				D.f[dirTW  ] = &DDStart[dirTW  *size_Mat];
-				D.f[dirTN  ] = &DDStart[dirTN  *size_Mat];
-				D.f[dirBS  ] = &DDStart[dirBS  *size_Mat];
-				D.f[dirBN  ] = &DDStart[dirBN  *size_Mat];
-				D.f[dirTS  ] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirTNE ] = &DDStart[dirTNE *size_Mat];
-				D.f[dirTSW ] = &DDStart[dirTSW *size_Mat];
-				D.f[dirTSE ] = &DDStart[dirTSE *size_Mat];
-				D.f[dirTNW ] = &DDStart[dirTNW *size_Mat];
-				D.f[dirBNE ] = &DDStart[dirBNE *size_Mat];
-				D.f[dirBSW ] = &DDStart[dirBSW *size_Mat];
-				D.f[dirBSE ] = &DDStart[dirBSE *size_Mat];
-				D.f[dirBNW ] = &DDStart[dirBNW *size_Mat];
-			}
-			else
-			{
-				D.f[dirW   ] = &DDStart[dirE   *size_Mat];
-				D.f[dirE   ] = &DDStart[dirW   *size_Mat];
-				D.f[dirS   ] = &DDStart[dirN   *size_Mat];
-				D.f[dirN   ] = &DDStart[dirS   *size_Mat];
-				D.f[dirB   ] = &DDStart[dirT   *size_Mat];
-				D.f[dirT   ] = &DDStart[dirB   *size_Mat];
-				D.f[dirSW  ] = &DDStart[dirNE  *size_Mat];
-				D.f[dirNE  ] = &DDStart[dirSW  *size_Mat];
-				D.f[dirNW  ] = &DDStart[dirSE  *size_Mat];
-				D.f[dirSE  ] = &DDStart[dirNW  *size_Mat];
-				D.f[dirBW  ] = &DDStart[dirTE  *size_Mat];
-				D.f[dirTE  ] = &DDStart[dirBW  *size_Mat];
-				D.f[dirTW  ] = &DDStart[dirBE  *size_Mat];
-				D.f[dirBE  ] = &DDStart[dirTW  *size_Mat];
-				D.f[dirBS  ] = &DDStart[dirTN  *size_Mat];
-				D.f[dirTN  ] = &DDStart[dirBS  *size_Mat];
-				D.f[dirTS  ] = &DDStart[dirBN  *size_Mat];
-				D.f[dirBN  ] = &DDStart[dirTS  *size_Mat];
-				D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
-				D.f[dirBSW ] = &DDStart[dirTNE *size_Mat];
-				D.f[dirBNE ] = &DDStart[dirTSW *size_Mat];
-				D.f[dirBNW ] = &DDStart[dirTSE *size_Mat];
-				D.f[dirBSE ] = &DDStart[dirTNW *size_Mat];
-				D.f[dirTSW ] = &DDStart[dirBNE *size_Mat];
-				D.f[dirTNE ] = &DDStart[dirBSW *size_Mat];
-				D.f[dirTNW ] = &DDStart[dirBSE *size_Mat];
-				D.f[dirTSE ] = &DDStart[dirBNW *size_Mat];
-			}
-			////////////////////////////////////////////////////////////////////////////////
-			//index
-			unsigned int kw = neighborX[k];
-			unsigned int ks = neighborY[k];
-			unsigned int kb = neighborZ[k];
-			unsigned int ksw = neighborY[kw];
-			unsigned int kbw = neighborZ[kw];
-			unsigned int kbs = neighborZ[ks];
-			unsigned int kbsw = neighborZ[ksw];
-			////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[dirE   ])[k   ];
-			real mfabb = (D.f[dirW   ])[kw  ];
-			real mfbcb = (D.f[dirN   ])[k   ];
-			real mfbab = (D.f[dirS   ])[ks  ];
-			real mfbbc = (D.f[dirT   ])[k   ];
-			real mfbba = (D.f[dirB   ])[kb  ];
-			real mfccb = (D.f[dirNE  ])[k   ];
-			real mfaab = (D.f[dirSW  ])[ksw ];
-			real mfcab = (D.f[dirSE  ])[ks  ];
-			real mfacb = (D.f[dirNW  ])[kw  ];
-			real mfcbc = (D.f[dirTE  ])[k   ];
-			real mfaba = (D.f[dirBW  ])[kbw ];
-			real mfcba = (D.f[dirBE  ])[kb  ];
-			real mfabc = (D.f[dirTW  ])[kw  ];
-			real mfbcc = (D.f[dirTN  ])[k   ];
-			real mfbaa = (D.f[dirBS  ])[kbs ];
-			real mfbca = (D.f[dirBN  ])[kb  ];
-			real mfbac = (D.f[dirTS  ])[ks  ];
-			real mfbbb = (D.f[dirZERO])[k   ];
-			real mfccc = (D.f[dirTNE ])[k   ];
-			real mfaac = (D.f[dirTSW ])[ksw ];
-			real mfcac = (D.f[dirTSE ])[ks  ];
-			real mfacc = (D.f[dirTNW ])[kw  ];
-			real mfcca = (D.f[dirBNE ])[kb  ];
-			real mfaaa = (D.f[dirBSW ])[kbsw];
-			real mfcaa = (D.f[dirBSE ])[kbs ];
-			real mfaca = (D.f[dirBNW ])[kbw ];
-			////////////////////////////////////////////////////////////////////////////////////
-			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-				((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-
-			real rho = c1o1 + drho;
-			real OOrho = c1o1 / rho;
-			////////////////////////////////////////////////////////////////////////////////////
-			real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-				(((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
-				(mfcbb - mfabb)) * OOrho;
-			real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-				(((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
-				(mfbcb - mfbab)) * OOrho;
-			real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-				(((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
-				(mfbbc - mfbba)) * OOrho;
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
-			real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
-			real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
-			vvx += fx*c1o2;
-			vvy += fy*c1o2;
-			vvz += fz*c1o2;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real omega = omega_in;
-			////////////////////////////////////////////////////////////////////////////////////
-			//fast
-			//real oMdrho = c1o1; // comp special
-			//real m0, m1, m2;
-			real vx2;
-			real vy2;
-			real vz2;
-			vx2 = vvx*vvx;
-			vy2 = vvy*vvy;
-			vz2 = vvz*vvz;
-			////////////////////////////////////////////////////////////////////////////////////
-			//real wadjust;
-			//real qudricLimitP = c1o100;// * 0.0001f;
-			//real qudricLimitM = c1o100;// * 0.0001f;
-			//real qudricLimitD = c1o100;// * 0.001f;
-			////////////////////////////////////////////////////////////////////////////////////
-			//Hin
-			////////////////////////////////////////////////////////////////////////////////////
-			// mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Z - Dir
-			forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, 36.0f, c1o36);
-			forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, 9.0f , c1o9 );
-			forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, 36.0f, c1o36);
-			forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, 9.0f , c1o9 );
-			forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, 2.25f, c4o9 );
-			forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, 9.0f , c1o9 );
-			forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, 36.0f, c1o36);
-			forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, 9.0f , c1o9 );
-			forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, 36.0f, c1o36);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Y - Dir
-			forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, 6.0f , c1o6 );
-			forwardChimera(     mfaab, mfabb, mfacb, vvy, vy2);
-			forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, 18.0f, c1o18);
-			forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, 1.5f , c2o3 );
-			forwardChimera(     mfbab, mfbbb, mfbcb, vvy, vy2);
-			forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, 4.5f , c2o9 );
-			forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, 6.0f , c1o6 );
-			forwardChimera(     mfcab, mfcbb, mfccb, vvy, vy2);
-			forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, 18.0f, c1o18);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// X - Dir
-			forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-			forwardChimera(     mfaba, mfbba, mfcba, vvx, vx2);
-			forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, 3.0f, c1o3);
-			forwardChimera(     mfaab, mfbab, mfcab, vvx, vx2);
-			forwardChimera(     mfabb, mfbbb, mfcbb, vvx, vx2);
-			forwardChimera(     mfacb, mfbcb, mfccb, vvx, vx2);
-			forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, 3.0f, c1o3);
-			forwardChimera(     mfabc, mfbbc, mfcbc, vvx, vx2);
-			forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, 9.0f, c1o9);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			// Cumulants
-			////////////////////////////////////////////////////////////////////////////////////
-			real OxxPyyPzz = c1o1;
-			////////////////////////////////////////////////////////////
-			//3.
-			//////////////////////////////
-			real OxyyPxzz = c1o1;
-			real OxyyMxzz = c1o1;
-			//real Oxyz = c1o1;
-			////////////////////////////////////////////////////////////
-			//4.
-			//////////////////////////////
-			real O4 = c1o1;
-			////////////////////////////////////////////////////////////
-			//5.
-			//////////////////////////////
-			real O5 = c1o1;
-			////////////////////////////////////////////////////////////
-			//6.
-			//////////////////////////////
-			real O6 = c1o1;
-			////////////////////////////////////////////////////////////
-
-
-			//central moments to cumulants
-			//4.
-			real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
-			real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
-			real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
-
-			real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho));
-			real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho));
-			real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho));
-
-			//5.
-			real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
-			real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
-			real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
-
-			//6.
-			real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-				+ c2o1 * (mfcaa * mfaca * mfaac)
-				+ c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
-				- c1o3 * (mfacc + mfcac + mfcca) * OOrho
-				- c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-				+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho  * c2o3
-				+ c1o27*((drho * drho - drho) * OOrho * OOrho ));
-
-
-			//2.
-			// linear combinations
-			real mxxPyyPzz = mfcaa + mfaca + mfaac;
-			real mxxMyy = mfcaa - mfaca;
-			real mxxMzz = mfcaa - mfaac;
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
-			{
-				real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
-				real dyuy = dxux + omega * c3o2 * mxxMyy;
-				real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-				//relax
-				mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
-				mxxMyy    += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-				mxxMzz    += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-			}
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			////no correction
-			//mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
-			//mxxMyy += -(-omega) * (-mxxMyy);
-			//mxxMzz += -(-omega) * (-mxxMzz);
-			/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			mfabb += omega * (-mfabb);
-			mfbab += omega * (-mfbab);
-			mfbba += omega * (-mfbba);
-
-			//////////////////////////////////////////////////////////////////////////
-
-			// linear combinations back
-			mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
-			mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
-
-			//3.
-			// linear combinations
-
-			real mxxyPyzz = mfcba + mfabc;
-			real mxxyMyzz = mfcba - mfabc;
-
-			real mxxzPyyz = mfcab + mfacb;
-			real mxxzMyyz = mfcab - mfacb;
-
-			real mxyyPxzz = mfbca + mfbac;
-			real mxyyMxzz = mfbca - mfbac;
-
-			//relax
-			//////////////////////////////////////////////////////////////////////////
-			mfbbb     += OxyyMxzz * (-mfbbb);
-			mxxyPyzz  += OxyyPxzz * (-mxxyPyzz);
-			mxxyMyzz  += OxyyMxzz * (-mxxyMyzz);
-			mxxzPyyz  += OxyyPxzz * (-mxxzPyyz);
-			mxxzMyyz  += OxyyMxzz * (-mxxzMyyz);
-			mxyyPxzz  += OxyyPxzz * (-mxyyPxzz);
-			mxyyMxzz  += OxyyMxzz * (-mxyyMxzz);
-			//////////////////////////////////////////////////////////////////////////
-
-			mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-			mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-			mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-			mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-			mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-			mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-
-			//4.
-			//////////////////////////////////////////////////////////////////////////
-			CUMacc += O4 * (-CUMacc);
-			CUMcac += O4 * (-CUMcac);
-			CUMcca += O4 * (-CUMcca);
-
-			CUMbbc += O4 * (-CUMbbc);
-			CUMbcb += O4 * (-CUMbcb);
-			CUMcbb += O4 * (-CUMcbb);
-			//////////////////////////////////////////////////////////////////////////
-
-
-			//5.
-			CUMbcc += O5 * (-CUMbcc);
-			CUMcbc += O5 * (-CUMcbc);
-			CUMccb += O5 * (-CUMccb);
-
-			//6.
-			CUMccc += O6 * (-CUMccc);
-
-
-
-			//back cumulants to central moments
-			//4.
-			mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; 
-			mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
-			mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
-
-			mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9;
-			mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9;
-			mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9;
-
-			//5.
-			mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
-			mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
-			mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +  (mfacb + mfcab)) * OOrho;
-
-			//6.
-			mfccc = 
-				CUMccc - ((-c4o1 *  mfbbb * mfbbb
-				- (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
-				- c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
-				- c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
-				+ (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
-				+ c2o1 * (mfcaa * mfaca * mfaac)
-				+ c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
-				- c1o3 * (mfacc + mfcac + mfcca) * OOrho
-				- c1o9 * (mfcaa + mfaca + mfaac) * OOrho
-				+ (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
-				+ (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
-				+ c1o27*((drho * drho - drho) * OOrho * OOrho ));
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//the force be with you
-			mfbaa = -mfbaa;
-			mfaba = -mfaba;
-			mfaab = -mfaab;
-			////////////////////////////////////////////////////////////////////////////////////
-
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//back
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// X - Dir
-			backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-			backwardChimera(			mfaba, mfbba, mfcba, vvx, vx2);
-			backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, 3.0f, c1o3);
-			backwardChimera(			mfaab, mfbab, mfcab, vvx, vx2);
-			backwardChimera(			mfabb, mfbbb, mfcbb, vvx, vx2);
-			backwardChimera(			mfacb, mfbcb, mfccb, vvx, vx2);
-			backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, 3.0f, c1o3);
-			backwardChimera(			mfabc, mfbbc, mfcbc, vvx, vx2);
-			backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, 9.0f, c1o9);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Y - Dir
-			backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, 6.0f , c1o6 );
-			backwardChimera(			mfaab, mfabb, mfacb, vvy, vy2);
-			backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, 18.0f, c1o18);
-			backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, 1.5f , c2o3 );
-			backwardChimera(			mfbab, mfbbb, mfbcb, vvy, vy2);
-			backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, 4.5f , c2o9 );
-			backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, 6.0f , c1o6 );
-			backwardChimera(			mfcab, mfcbb, mfccb, vvy, vy2);
-			backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, 18.0f, c1o18);
-
-			////////////////////////////////////////////////////////////////////////////////////
-			//mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
-			////////////////////////////////////////////////////////////////////////////////////
-			// Z - Dir
-			backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, 36.0f, c1o36);
-			backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, 9.0f , c1o9 );
-			backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, 36.0f, c1o36);
-			backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, 9.0f , c1o9 );
-			backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, 2.25f, c4o9 );
-			backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, 9.0f , c1o9 );
-			backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, 36.0f, c1o36);
-			backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, 9.0f , c1o9 );
-			backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, 36.0f, c1o36);
-
-			//////////////////////////////////////////////////////////////////////////////////////
-			real drhoPost =
-				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
-					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
-			mfbbb += drho - drhoPost;
-			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[dirE   ])[k   ] = mfabb;                                                                   
-			(D.f[dirW   ])[kw  ] = mfcbb;                                                                 
-			(D.f[dirN   ])[k   ] = mfbab;
-			(D.f[dirS   ])[ks  ] = mfbcb;
-			(D.f[dirT   ])[k   ] = mfbba;
-			(D.f[dirB   ])[kb  ] = mfbbc;
-			(D.f[dirNE  ])[k   ] = mfaab;
-			(D.f[dirSW  ])[ksw ] = mfccb;
-			(D.f[dirSE  ])[ks  ] = mfacb;
-			(D.f[dirNW  ])[kw  ] = mfcab;
-			(D.f[dirTE  ])[k   ] = mfaba;
-			(D.f[dirBW  ])[kbw ] = mfcbc;
-			(D.f[dirBE  ])[kb  ] = mfabc;
-			(D.f[dirTW  ])[kw  ] = mfcba;
-			(D.f[dirTN  ])[k   ] = mfbaa;
-			(D.f[dirBS  ])[kbs ] = mfbcc;
-			(D.f[dirBN  ])[kb  ] = mfbac;
-			(D.f[dirTS  ])[ks  ] = mfbca;
-			(D.f[dirZERO])[k   ] = mfbbb;
-			(D.f[dirTNE ])[k   ] = mfaaa;
-			(D.f[dirTSE ])[ks  ] = mfaca;
-			(D.f[dirBNE ])[kb  ] = mfaac;
-			(D.f[dirBSE ])[kbs ] = mfacc;
-			(D.f[dirTNW ])[kw  ] = mfcaa;
-			(D.f[dirTSW ])[ksw ] = mfcca;
-			(D.f[dirBNW ])[kbw ] = mfcac;
-			(D.f[dirBSW ])[kbsw] = mfccc;
-		}
-	}
+    ////////////////////////////////////////////////////////////////////////////////
+    const unsigned  x = threadIdx.x;  // Globaler x-Index 
+    const unsigned  y = blockIdx.x;   // Globaler y-Index 
+    const unsigned  z = blockIdx.y;   // Globaler z-Index 
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    const unsigned k = nx*(ny*z + y) + x;
+    //////////////////////////////////////////////////////////////////////////
+
+    if (k<size_Mat)
+    {
+        ////////////////////////////////////////////////////////////////////////////////
+        unsigned int BC;
+        BC = bcMatD[k];
+
+        if (BC >= GEO_FLUID/*(BC != GEO_SOLID) && (BC != GEO_VOID)*/)
+        {
+            Distributions27 D;
+            if (EvenOrOdd == true)
+            {
+                D.f[dirE   ] = &DDStart[dirE   *size_Mat];
+                D.f[dirW   ] = &DDStart[dirW   *size_Mat];
+                D.f[dirN   ] = &DDStart[dirN   *size_Mat];
+                D.f[dirS   ] = &DDStart[dirS   *size_Mat];
+                D.f[dirT   ] = &DDStart[dirT   *size_Mat];
+                D.f[dirB   ] = &DDStart[dirB   *size_Mat];
+                D.f[dirNE  ] = &DDStart[dirNE  *size_Mat];
+                D.f[dirSW  ] = &DDStart[dirSW  *size_Mat];
+                D.f[dirSE  ] = &DDStart[dirSE  *size_Mat];
+                D.f[dirNW  ] = &DDStart[dirNW  *size_Mat];
+                D.f[dirTE  ] = &DDStart[dirTE  *size_Mat];
+                D.f[dirBW  ] = &DDStart[dirBW  *size_Mat];
+                D.f[dirBE  ] = &DDStart[dirBE  *size_Mat];
+                D.f[dirTW  ] = &DDStart[dirTW  *size_Mat];
+                D.f[dirTN  ] = &DDStart[dirTN  *size_Mat];
+                D.f[dirBS  ] = &DDStart[dirBS  *size_Mat];
+                D.f[dirBN  ] = &DDStart[dirBN  *size_Mat];
+                D.f[dirTS  ] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirTNE ] = &DDStart[dirTNE *size_Mat];
+                D.f[dirTSW ] = &DDStart[dirTSW *size_Mat];
+                D.f[dirTSE ] = &DDStart[dirTSE *size_Mat];
+                D.f[dirTNW ] = &DDStart[dirTNW *size_Mat];
+                D.f[dirBNE ] = &DDStart[dirBNE *size_Mat];
+                D.f[dirBSW ] = &DDStart[dirBSW *size_Mat];
+                D.f[dirBSE ] = &DDStart[dirBSE *size_Mat];
+                D.f[dirBNW ] = &DDStart[dirBNW *size_Mat];
+            }
+            else
+            {
+                D.f[dirW   ] = &DDStart[dirE   *size_Mat];
+                D.f[dirE   ] = &DDStart[dirW   *size_Mat];
+                D.f[dirS   ] = &DDStart[dirN   *size_Mat];
+                D.f[dirN   ] = &DDStart[dirS   *size_Mat];
+                D.f[dirB   ] = &DDStart[dirT   *size_Mat];
+                D.f[dirT   ] = &DDStart[dirB   *size_Mat];
+                D.f[dirSW  ] = &DDStart[dirNE  *size_Mat];
+                D.f[dirNE  ] = &DDStart[dirSW  *size_Mat];
+                D.f[dirNW  ] = &DDStart[dirSE  *size_Mat];
+                D.f[dirSE  ] = &DDStart[dirNW  *size_Mat];
+                D.f[dirBW  ] = &DDStart[dirTE  *size_Mat];
+                D.f[dirTE  ] = &DDStart[dirBW  *size_Mat];
+                D.f[dirTW  ] = &DDStart[dirBE  *size_Mat];
+                D.f[dirBE  ] = &DDStart[dirTW  *size_Mat];
+                D.f[dirBS  ] = &DDStart[dirTN  *size_Mat];
+                D.f[dirTN  ] = &DDStart[dirBS  *size_Mat];
+                D.f[dirTS  ] = &DDStart[dirBN  *size_Mat];
+                D.f[dirBN  ] = &DDStart[dirTS  *size_Mat];
+                D.f[dirZERO] = &DDStart[dirZERO*size_Mat];
+                D.f[dirBSW ] = &DDStart[dirTNE *size_Mat];
+                D.f[dirBNE ] = &DDStart[dirTSW *size_Mat];
+                D.f[dirBNW ] = &DDStart[dirTSE *size_Mat];
+                D.f[dirBSE ] = &DDStart[dirTNW *size_Mat];
+                D.f[dirTSW ] = &DDStart[dirBNE *size_Mat];
+                D.f[dirTNE ] = &DDStart[dirBSW *size_Mat];
+                D.f[dirTNW ] = &DDStart[dirBSE *size_Mat];
+                D.f[dirTSE ] = &DDStart[dirBNW *size_Mat];
+            }
+            ////////////////////////////////////////////////////////////////////////////////
+            //index
+            unsigned int kw = neighborX[k];
+            unsigned int ks = neighborY[k];
+            unsigned int kb = neighborZ[k];
+            unsigned int ksw = neighborY[kw];
+            unsigned int kbw = neighborZ[kw];
+            unsigned int kbs = neighborZ[ks];
+            unsigned int kbsw = neighborZ[ksw];
+            ////////////////////////////////////////////////////////////////////////////////////
+            real mfcbb = (D.f[dirE   ])[k   ];
+            real mfabb = (D.f[dirW   ])[kw  ];
+            real mfbcb = (D.f[dirN   ])[k   ];
+            real mfbab = (D.f[dirS   ])[ks  ];
+            real mfbbc = (D.f[dirT   ])[k   ];
+            real mfbba = (D.f[dirB   ])[kb  ];
+            real mfccb = (D.f[dirNE  ])[k   ];
+            real mfaab = (D.f[dirSW  ])[ksw ];
+            real mfcab = (D.f[dirSE  ])[ks  ];
+            real mfacb = (D.f[dirNW  ])[kw  ];
+            real mfcbc = (D.f[dirTE  ])[k   ];
+            real mfaba = (D.f[dirBW  ])[kbw ];
+            real mfcba = (D.f[dirBE  ])[kb  ];
+            real mfabc = (D.f[dirTW  ])[kw  ];
+            real mfbcc = (D.f[dirTN  ])[k   ];
+            real mfbaa = (D.f[dirBS  ])[kbs ];
+            real mfbca = (D.f[dirBN  ])[kb  ];
+            real mfbac = (D.f[dirTS  ])[ks  ];
+            real mfbbb = (D.f[dirZERO])[k   ];
+            real mfccc = (D.f[dirTNE ])[k   ];
+            real mfaac = (D.f[dirTSW ])[ksw ];
+            real mfcac = (D.f[dirTSE ])[ks  ];
+            real mfacc = (D.f[dirTNW ])[kw  ];
+            real mfcca = (D.f[dirBNE ])[kb  ];
+            real mfaaa = (D.f[dirBSW ])[kbsw];
+            real mfcaa = (D.f[dirBSE ])[kbs ];
+            real mfaca = (D.f[dirBNW ])[kbw ];
+            ////////////////////////////////////////////////////////////////////////////////////
+            real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+
+            real rho = c1o1 + drho;
+            real OOrho = c1o1 / rho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
+                (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) +
+                (mfcbb - mfabb)) * OOrho;
+            real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
+                (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) +
+                (mfbcb - mfbab)) * OOrho;
+            real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
+                (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) +
+                (mfbbc - mfbba)) * OOrho;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            real fx = forces[0] / (pow((double)c2o1, (double)level)); //zero;//0.0032653/(pow(two,level)); //0.000000005;//(two/1600000.0) / 120.0; //
+            real fy = forces[1] / (pow((double)c2o1, (double)level)); //zero;
+            real fz = forces[2] / (pow((double)c2o1, (double)level)); //zero;
+            vvx += fx*c1o2;
+            vvy += fy*c1o2;
+            vvz += fz*c1o2;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real omega = omega_in;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //fast
+            //real oMdrho = c1o1; // comp special
+            //real m0, m1, m2;
+            real vx2;
+            real vy2;
+            real vz2;
+            vx2 = vvx*vvx;
+            vy2 = vvy*vvy;
+            vz2 = vvz*vvz;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //real wadjust;
+            //real qudricLimitP = c1o100;// * 0.0001f;
+            //real qudricLimitM = c1o100;// * 0.0001f;
+            //real qudricLimitD = c1o100;// * 0.001f;
+            ////////////////////////////////////////////////////////////////////////////////////
+            //Hin
+            ////////////////////////////////////////////////////////////////////////////////////
+            // mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36  Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Z - Dir
+            vf::lbm::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, 2.25f, c4o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, 36.0f, c1o36);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            // mit  1/6, 0, 1/18, 2/3, 0, 2/9, 1/6, 0, 1/18 Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Y - Dir
+            vf::lbm::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, 6.0f , c1o6 );
+            vf::lbm::forwardChimera(     mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, 18.0f, c1o18);
+            vf::lbm::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, 1.5f , c2o3 );
+            vf::lbm::forwardChimera(     mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, 4.5f , c2o9 );
+            vf::lbm::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, 6.0f , c1o6 );
+            vf::lbm::forwardChimera(     mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, 18.0f, c1o18);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            // mit     1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9		Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // X - Dir
+            vf::lbm::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
+            vf::lbm::forwardChimera(     mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, 3.0f, c1o3);
+            vf::lbm::forwardChimera(     mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::forwardChimera(     mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::forwardChimera(     mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, 3.0f, c1o3);
+            vf::lbm::forwardChimera(     mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, 9.0f, c1o9);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Cumulants
+            ////////////////////////////////////////////////////////////////////////////////////
+            real OxxPyyPzz = c1o1;
+            ////////////////////////////////////////////////////////////
+            //3.
+            //////////////////////////////
+            real OxyyPxzz = c1o1;
+            real OxyyMxzz = c1o1;
+            //real Oxyz = c1o1;
+            ////////////////////////////////////////////////////////////
+            //4.
+            //////////////////////////////
+            real O4 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //5.
+            //////////////////////////////
+            real O5 = c1o1;
+            ////////////////////////////////////////////////////////////
+            //6.
+            //////////////////////////////
+            real O6 = c1o1;
+            ////////////////////////////////////////////////////////////
+
+
+            //central moments to cumulants
+            //4.
+            real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+            real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+            real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
+
+            real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho * OOrho));
+            real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho * OOrho));
+            real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho * OOrho));
+
+            //5.
+            real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
+            real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
+            real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
+
+            //6.
+            real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                + c2o1 * (mfcaa * mfaca * mfaac)
+                + c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
+                - c1o3 * (mfacc + mfcac + mfcca) * OOrho
+                - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho  * c2o3
+                + c1o27*((drho * drho - drho) * OOrho * OOrho ));
+
+
+            //2.
+            // linear combinations
+            real mxxPyyPzz = mfcaa + mfaca + mfaac;
+            real mxxMyy = mfcaa - mfaca;
+            real mxxMzz = mfcaa - mfaac;
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            //incl. correction		(hat noch nicht so gut funktioniert...Optimierungsbedarf??)
+            {
+                real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+                real dyuy = dxux + omega * c3o2 * mxxMyy;
+                real dzuz = dxux + omega * c3o2 * mxxMzz;
+
+                //relax
+                mxxPyyPzz += OxxPyyPzz*(mfaaa  - mxxPyyPzz)- c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);//-magicBulk*OxxPyyPzz;
+                mxxMyy    += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+                mxxMzz    += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+
+            }
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            ////no correction
+            //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);//-magicBulk*OxxPyyPzz;
+            //mxxMyy += -(-omega) * (-mxxMyy);
+            //mxxMzz += -(-omega) * (-mxxMzz);
+            /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            mfabb += omega * (-mfabb);
+            mfbab += omega * (-mfbab);
+            mfbba += omega * (-mfbba);
+
+            //////////////////////////////////////////////////////////////////////////
+
+            // linear combinations back
+            mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
+            mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz);
+
+            //3.
+            // linear combinations
+
+            real mxxyPyzz = mfcba + mfabc;
+            real mxxyMyzz = mfcba - mfabc;
+
+            real mxxzPyyz = mfcab + mfacb;
+            real mxxzMyyz = mfcab - mfacb;
+
+            real mxyyPxzz = mfbca + mfbac;
+            real mxyyMxzz = mfbca - mfbac;
+
+            //relax
+            //////////////////////////////////////////////////////////////////////////
+            mfbbb     += OxyyMxzz * (-mfbbb);
+            mxxyPyzz  += OxyyPxzz * (-mxxyPyzz);
+            mxxyMyzz  += OxyyMxzz * (-mxxyMyzz);
+            mxxzPyyz  += OxyyPxzz * (-mxxzPyyz);
+            mxxzMyyz  += OxyyMxzz * (-mxxzMyyz);
+            mxyyPxzz  += OxyyPxzz * (-mxyyPxzz);
+            mxyyMxzz  += OxyyMxzz * (-mxyyMxzz);
+            //////////////////////////////////////////////////////////////////////////
+
+            mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
+            mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
+            mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
+            mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
+            mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
+            mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+
+            //4.
+            //////////////////////////////////////////////////////////////////////////
+            CUMacc += O4 * (-CUMacc);
+            CUMcac += O4 * (-CUMcac);
+            CUMcca += O4 * (-CUMcca);
+
+            CUMbbc += O4 * (-CUMbbc);
+            CUMbcb += O4 * (-CUMbcb);
+            CUMcbb += O4 * (-CUMcbb);
+            //////////////////////////////////////////////////////////////////////////
+
+
+            //5.
+            CUMbcc += O5 * (-CUMbcc);
+            CUMcbc += O5 * (-CUMcbc);
+            CUMccb += O5 * (-CUMccb);
+
+            //6.
+            CUMccc += O6 * (-CUMccc);
+
+
+
+            //back cumulants to central moments
+            //4.
+            mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho; 
+            mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+            mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
+
+            mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho))*c1o9;
+            mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho))*c1o9;
+            mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho))*c1o9;
+
+            //5.
+            mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
+            mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
+            mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +  (mfacb + mfcab)) * OOrho;
+
+            //6.
+            mfccc = 
+                CUMccc - ((-c4o1 *  mfbbb * mfbbb
+                - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+                - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+                - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+                + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                + c2o1 * (mfcaa * mfaca * mfaac)
+                + c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
+                - c1o3 * (mfacc + mfcac + mfcca) * OOrho
+                - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
+                + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
+                + c1o27*((drho * drho - drho) * OOrho * OOrho ));
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //the force be with you
+            mfbaa = -mfbaa;
+            mfaba = -mfaba;
+            mfaab = -mfaab;
+            ////////////////////////////////////////////////////////////////////////////////////
+
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //back
+            ////////////////////////////////////////////////////////////////////////////////////
+            //mit 1, 0, 1/3, 0, 0, 0, 1/3, 0, 1/9   Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // X - Dir
+            vf::lbm::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
+            vf::lbm::backwardChimera(			mfaba, mfbba, mfcba, vvx, vx2);
+            vf::lbm::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, 3.0f, c1o3);
+            vf::lbm::backwardChimera(			mfaab, mfbab, mfcab, vvx, vx2);
+            vf::lbm::backwardChimera(			mfabb, mfbbb, mfcbb, vvx, vx2);
+            vf::lbm::backwardChimera(			mfacb, mfbcb, mfccb, vvx, vx2);
+            vf::lbm::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, 3.0f, c1o3);
+            vf::lbm::backwardChimera(			mfabc, mfbbc, mfcbc, vvx, vx2);
+            vf::lbm::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, 9.0f, c1o9);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //mit 1/6, 2/3, 1/6, 0, 0, 0, 1/18, 2/9, 1/18   Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Y - Dir
+            vf::lbm::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, 6.0f , c1o6 );
+            vf::lbm::backwardChimera(			mfaab, mfabb, mfacb, vvy, vy2);
+            vf::lbm::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, 18.0f, c1o18);
+            vf::lbm::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, 1.5f , c2o3 );
+            vf::lbm::backwardChimera(			mfbab, mfbbb, mfbcb, vvy, vy2);
+            vf::lbm::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, 4.5f , c2o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, 6.0f , c1o6 );
+            vf::lbm::backwardChimera(			mfcab, mfcbb, mfccb, vvy, vy2);
+            vf::lbm::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, 18.0f, c1o18);
+
+            ////////////////////////////////////////////////////////////////////////////////////
+            //mit 1/36, 1/9, 1/36, 1/9, 4/9, 1/9, 1/36, 1/9, 1/36 Konditionieren
+            ////////////////////////////////////////////////////////////////////////////////////
+            // Z - Dir
+            vf::lbm::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, 2.25f, c4o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, 36.0f, c1o36);
+            vf::lbm::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, 9.0f , c1o9 );
+            vf::lbm::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, 36.0f, c1o36);
+
+            //////////////////////////////////////////////////////////////////////////////////////
+            real drhoPost =
+                ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
+                (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
+                    ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
+            mfbbb += drho - drhoPost;
+            ////////////////////////////////////////////////////////////////////////////////////
+            (D.f[dirE   ])[k   ] = mfabb;                                                                   
+            (D.f[dirW   ])[kw  ] = mfcbb;                                                                 
+            (D.f[dirN   ])[k   ] = mfbab;
+            (D.f[dirS   ])[ks  ] = mfbcb;
+            (D.f[dirT   ])[k   ] = mfbba;
+            (D.f[dirB   ])[kb  ] = mfbbc;
+            (D.f[dirNE  ])[k   ] = mfaab;
+            (D.f[dirSW  ])[ksw ] = mfccb;
+            (D.f[dirSE  ])[ks  ] = mfacb;
+            (D.f[dirNW  ])[kw  ] = mfcab;
+            (D.f[dirTE  ])[k   ] = mfaba;
+            (D.f[dirBW  ])[kbw ] = mfcbc;
+            (D.f[dirBE  ])[kb  ] = mfabc;
+            (D.f[dirTW  ])[kw  ] = mfcba;
+            (D.f[dirTN  ])[k   ] = mfbaa;
+            (D.f[dirBS  ])[kbs ] = mfbcc;
+            (D.f[dirBN  ])[kb  ] = mfbac;
+            (D.f[dirTS  ])[ks  ] = mfbca;
+            (D.f[dirZERO])[k   ] = mfbbb;
+            (D.f[dirTNE ])[k   ] = mfaaa;
+            (D.f[dirTSE ])[ks  ] = mfaca;
+            (D.f[dirBNE ])[kb  ] = mfaac;
+            (D.f[dirBSE ])[kbs ] = mfacc;
+            (D.f[dirTNW ])[kw  ] = mfcaa;
+            (D.f[dirTSW ])[ksw ] = mfcca;
+            (D.f[dirBNW ])[kbw ] = mfcac;
+            (D.f[dirBSW ])[kbsw] = mfccc;
+        }
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index 3589bcea15dd73771ff0f7579401e65da7010389..dfdbac44d197e55e3e78eb794692fd9443cb7ab6 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -131,6 +131,7 @@ extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
+
 extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index b41724df488f7aea97ed5e9e2bcf9282ff6bc370..288db43e7bcd36dc4d187982b86178d345601094 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -140,7 +140,6 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-
 extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index a203cca15bfd0fccac53d1d86555a023063ea86a..8b80e5ea9dbed3deef4c4332b2d43bf62ba9e48b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -353,6 +353,7 @@ extern "C" void KernelKumNewCompSP27(unsigned int numberOfThreads,
 		//													EvenOrOdd); 
 		//getLastCudaError("LB_Kernel_Kum_New_Comp_SP_27 execution failed"); 
 }
+
 //////////////////////////////////////////////////////////////////////////
 extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
 																	real s9,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
index 8932dadb3f92256cba279121193f94dbabdc0a6d..213127e64c5e7bfe2b3d4c9d1154c11d374f9552 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernel.h
@@ -1,26 +1,19 @@
-#ifndef KERNEL_H
-#define KERNEL_H
+#ifndef GPU_KERNEL_H
+#define GPU_KERNEL_H
 
-#include <DataTypes.h>
-
-#include <cuda_runtime.h>
-#include <helper_functions.h>
-#include <helper_cuda.h>
+#include <vector>
 
 #include "Kernel/Utilities/KernelGroup.h"
 #include "PreProcessor/PreProcessorType.h"
 
-#include <vector>
-
-
 class Kernel
 {
 public:
-    virtual ~Kernel() = default;
-	virtual void run() = 0;
+    virtual ~Kernel()  = default;
+    virtual void run() = 0;
 
-	virtual bool checkParameter() = 0;
-	virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
-	virtual KernelGroup getKernelGroup() = 0;
+    virtual bool checkParameter()                                = 0;
+    virtual std::vector<PreProcessorType> getPreProcessorTypes() = 0;
+    virtual KernelGroup getKernelGroup()                         = 0;
 };
-#endif
\ No newline at end of file
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 6d06d4c9175c7935bb609c2e342b29c50c42846d..5e4c5aa08e37e88008da13466bfeed6893ec94f6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -2,26 +2,26 @@
 
 #include "Kernel/Utilities/CheckParameterStrategy/CheckParameterStrategy.h"
 
-bool KernelImp::checkParameter()
-{
-	return checkStrategy->checkParameter(para);
+bool KernelImp::checkParameter() 
+{ 
+    return checkStrategy->checkParameter(para);
 }
 
-std::vector<PreProcessorType> KernelImp::getPreProcessorTypes()
-{
-	return myPreProcessorTypes;
+std::vector<PreProcessorType> KernelImp::getPreProcessorTypes() 
+{ 
+    return myPreProcessorTypes;
 }
 
-KernelGroup KernelImp::getKernelGroup()
-{
-	return myKernelGroup;
+KernelGroup KernelImp::getKernelGroup() 
+{ 
+    return myKernelGroup; 
 }
 
 void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy)
 {
-	this->checkStrategy = strategy;
+    this->checkStrategy = strategy;
 }
 
-KernelImp::KernelImp()
-{
-}
+KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {}
+
+KernelImp::KernelImp() {}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index f902a7aaec82acd1eb0a55e97f5a4f31f4334575..08b71d42e79564d9eac887289a1ae36824095c46 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -5,28 +5,33 @@
 
 #include <memory>
 
+#include "Utilities/CudaGrid.h"
+
 class CheckParameterStrategy;
 class Parameter;
 
 class KernelImp : public Kernel
 {
 public:
-	virtual void run() = 0;
+    virtual void run() = 0;
 
-	bool checkParameter();
-	std::vector<PreProcessorType> getPreProcessorTypes();
-	KernelGroup getKernelGroup();
+    bool checkParameter();
+    std::vector<PreProcessorType> getPreProcessorTypes();
+    KernelGroup getKernelGroup();
 
-	void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
+    void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
 
 protected:
-	KernelImp();
+    KernelImp(std::shared_ptr<Parameter> para, int level);
+    KernelImp();
 
-	std::shared_ptr< Parameter> para;
-	std::shared_ptr<CheckParameterStrategy> checkStrategy;
-	int level;
-	std::vector<PreProcessorType> myPreProcessorTypes;
-	KernelGroup myKernelGroup;
+    std::shared_ptr<Parameter> para;
+    std::shared_ptr<CheckParameterStrategy> checkStrategy;
+    int level;
+    std::vector<PreProcessorType> myPreProcessorTypes;
+    KernelGroup myKernelGroup;
 
+    vf::gpu::CudaGrid cudaGrid;
 };
-#endif
\ No newline at end of file
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
new file mode 100644
index 0000000000000000000000000000000000000000..4c82851996646590d8c246df5f940b58a308d52c
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.cu
@@ -0,0 +1,56 @@
+#include "BGKUnified.h"
+
+#include <stdexcept>
+
+#include "Parameter/Parameter.h"
+#include "../RunLBMKernel.cuh"
+#include "Kernel/Utilities/CudaGrid.h"
+
+#include <lbm/BGK.h>
+
+
+namespace vf
+{
+namespace gpu
+{
+
+
+BGKUnified::BGKUnified(std::shared_ptr<Parameter> para, int level) 
+    : KernelImp(para, level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument("To use the BKGUnified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
+
+
+void BGKUnified::run()
+{
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+                                                 para->getParD(level)->geoSP,
+                                                 para->getParD(level)->neighborX_SP,
+                                                 para->getParD(level)->neighborY_SP,
+                                                 para->getParD(level)->neighborZ_SP,
+                                                 para->getParD(level)->d0SP.f[0],
+                                                 (int)para->getParD(level)->size_Mat_SP,
+                                                 nullptr, /* forces not used in bgk kernel */
+                                                 para->getParD(level)->evenOrOdd };
+
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::bgk(parameter);
+    };
+
+    runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+
+    getLastCudaError("LB_Kernel_BGKUnified execution failed");
+}
+
+
+}
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
new file mode 100644
index 0000000000000000000000000000000000000000..762eaaa5935bd01fa6ae002521a40e45cd239dfd
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h
@@ -0,0 +1,22 @@
+#ifndef GPU_BKGUnified_H
+#define GPU_BKGUnified_H
+
+#include "Kernel/KernelImp.h"
+
+namespace vf
+{
+namespace gpu
+{
+
+class BGKUnified : public KernelImp
+{
+public:
+    BGKUnified(std::shared_ptr<Parameter> para, int level);
+
+    void run();
+};
+
+}
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
index 6a8eede33bc210eacad184afb42a7011dd684708..208fbec553507812bfe4339577734292b248c027 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
@@ -14,4 +14,4 @@ extern "C" __global__ void LB_Kernel_CumulantK15Comp(	real omega,
 														int level,
 														real* forces,
 														bool EvenOrOdd);
-#endif 
\ No newline at end of file
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b6f5d21ccf909f1ce3bcf11a4558f4771d87d021
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.cu
@@ -0,0 +1,54 @@
+#include "CumulantK15Unified.h"
+
+#include <stdexcept>
+
+#include "../RunLBMKernel.cuh"
+
+#include "Parameter/Parameter.h"
+
+#include <lbm/CumulantChimera.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+CumulantK15Unified::CumulantK15Unified(std::shared_ptr<Parameter> para, int level)
+    : KernelImp(para, level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument(
+        "To use the CumulantK15Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
+
+void CumulantK15Unified::run()
+{
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+                                                 para->getParD(level)->geoSP,
+                                                 para->getParD(level)->neighborX_SP,
+                                                 para->getParD(level)->neighborY_SP,
+                                                 para->getParD(level)->neighborZ_SP,
+                                                 para->getParD(level)->d0SP.f[0],
+                                                 (int)para->getParD(level)->size_Mat_SP,
+                                                 para->getParD(level)->forcing,
+                                                 para->getParD(level)->evenOrOdd };
+
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK15);
+    };
+
+    vf::gpu::runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+
+    getLastCudaError("LB_Kernel_CumulantK15Comp execution failed");
+}
+
+
+}
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
new file mode 100644
index 0000000000000000000000000000000000000000..8756253950484e00773af89327589c3d8f157729
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h
@@ -0,0 +1,21 @@
+#ifndef CUMULANT_K15_UNIFIED_COMP_H
+#define CUMULANT_K15_UNIFIED_COMP_H
+
+#include "Kernel/KernelImp.h"
+
+namespace vf
+{
+namespace gpu
+{
+class CumulantK15Unified : public KernelImp
+{
+public:
+    CumulantK15Unified(std::shared_ptr<Parameter> para, int level);
+    
+    void run();
+};
+
+}
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
new file mode 100644
index 0000000000000000000000000000000000000000..989fce0c5e797ef90d644845f6c502bee700f6e1
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.cu
@@ -0,0 +1,56 @@
+#include "CumulantK17Unified.h"
+
+#include <stdexcept>
+
+#include "Parameter/Parameter.h"
+#include "../RunLBMKernel.cuh"
+#include "Kernel/Utilities/CudaGrid.h"
+
+#include <lbm/CumulantChimera.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+
+CumulantK17Unified::CumulantK17Unified(std::shared_ptr<Parameter> para, int level)
+    : KernelImp(para, level)
+{
+#ifndef BUILD_CUDA_LTO
+    throw std::invalid_argument("To use the CumulantK17Unified kernel, pass -DBUILD_CUDA_LTO=ON to cmake. Requires: CUDA 11.2 & cc 5.0");
+#endif
+
+    myPreProcessorTypes.push_back(InitCompSP27);
+
+    myKernelGroup = BasicKernel;
+
+    this->cudaGrid = CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->size_Mat_SP);
+}
+
+
+
+void CumulantK17Unified::run()
+{
+    GPUKernelParameter kernelParameter{ para->getParD(level)->omega,
+                                                 para->getParD(level)->geoSP,
+                                                 para->getParD(level)->neighborX_SP,
+                                                 para->getParD(level)->neighborY_SP,
+                                                 para->getParD(level)->neighborZ_SP,
+                                                 para->getParD(level)->d0SP.f[0],
+                                                 (int)para->getParD(level)->size_Mat_SP,
+                                                 para->getParD(level)->forcing,
+                                                 para->getParD(level)->evenOrOdd };
+
+    auto lambda = [] __device__(lbm::KernelParameter parameter) {
+        return lbm::cumulantChimera(parameter, lbm::setRelaxationRatesK17);
+    };
+
+    runKernel<<<cudaGrid.grid, cudaGrid.threads>>>(lambda, kernelParameter);
+
+    getLastCudaError("LB_Kernel_CumulantK17Unified execution failed");
+}
+
+
+}
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
new file mode 100644
index 0000000000000000000000000000000000000000..af8470b717ad7a98e7a3fcd507976353d9e8bd41
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h
@@ -0,0 +1,23 @@
+#ifndef CUMULANT_K17_UNIFIED_H
+#define CUMULANT_K17_UNIFIED_H
+
+#include "Kernel/KernelImp.h"
+
+namespace vf
+{
+namespace gpu
+{
+
+
+class CumulantK17Unified : public KernelImp
+{
+public:
+    CumulantK17Unified(std::shared_ptr<Parameter> para, int level);
+
+    void run();
+};
+
+}
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
index 5bdf2e9bb0f00e3e2ae3642baa42545079f93c2c..c9a6675bd7a4b82442c55b23bac6f9b49b811938 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/FluidFlowCompStrategy.h
@@ -15,4 +15,4 @@ private:
     FluidFlowCompStrategy();
 
 };
-#endif 
\ No newline at end of file
+#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..b4097851b251b7447f6ce06856d0b9187999a20b
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/RunLBMKernel.cuh
@@ -0,0 +1,59 @@
+#ifndef GPU_CUMULANT_KERNEL_H
+#define GPU_CUMULANT_KERNEL_H
+
+
+#include <DataTypes.h>
+#include <cuda_runtime.h>
+
+#include <lbm/KernelParameter.h>
+
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
+namespace vf
+{
+namespace gpu
+{
+
+
+struct GPUKernelParameter
+{
+    real omega;
+    unsigned int* typeOfGridNode;
+    unsigned int* neighborX;
+    unsigned int* neighborY;
+    unsigned int* neighborZ;
+    real* distributions;
+    int size_Mat;
+    real* forces;
+    bool isEvenTimestep;
+};
+
+template<typename KernelFunctor>
+__global__ void runKernel(KernelFunctor kernel, GPUKernelParameter kernelParameter)
+{
+    const uint k = getNodeIndex();
+    const uint nodeType = kernelParameter.typeOfGridNode[k];
+
+    if (!isValidFluidNode(k, kernelParameter.size_Mat, nodeType))
+        return;
+
+    DistributionWrapper distributionWrapper {
+        kernelParameter.distributions,
+        kernelParameter.size_Mat,
+        kernelParameter.isEvenTimestep,
+        k,
+        kernelParameter.neighborX,
+        kernelParameter.neighborY,
+        kernelParameter.neighborZ
+    };
+
+    lbm::KernelParameter parameter {distributionWrapper.distribution, kernelParameter.omega, kernelParameter.forces};
+    kernel(parameter);
+
+    distributionWrapper.write();
+}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa17bf449915eba509dbabbe71f556c19fa43bcf
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.cpp
@@ -0,0 +1,27 @@
+#include "CudaGrid.h"
+
+
+
+namespace vf
+{
+namespace gpu
+{
+
+CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int size_matrix)
+{
+    int Grid = (size_matrix / numberOfThreads) + 1;
+    int Grid1, Grid2;
+    if (Grid > 512) {
+        Grid1 = 512;
+        Grid2 = (Grid / Grid1) + 1;
+    } else {
+        Grid1 = 1;
+        Grid2 = Grid;
+    }
+    
+    grid = dim3(Grid1, Grid2);
+    threads = dim3(numberOfThreads, 1, 1);
+}
+
+}
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
new file mode 100644
index 0000000000000000000000000000000000000000..27a18a58843b0de064009ab0f837518e3bb44b9d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/CudaGrid.h
@@ -0,0 +1,25 @@
+#ifndef GPU_CUDA_GRID_H
+#define GPU_CUDA_GRID_H
+
+
+#include <cuda_runtime.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+
+struct CudaGrid 
+{
+    dim3 threads;
+    dim3 grid;
+
+    CudaGrid(unsigned int numberOfEntities, unsigned int threadsPerBlock);
+    CudaGrid() = default;
+};
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bbb01d95410612d36d55f1e0113175a8741b9ade
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -0,0 +1,169 @@
+#include "DistributionHelper.cuh"
+
+#include <cuda_runtime.h>
+
+#include "LBM/D3Q27.h"
+
+#include <lbm/constants/NumericConstants.h>
+#include <lbm/constants/D3Q27.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+__device__ __host__ DistributionReferences27 getDistributionReferences27(real *distributions, unsigned int size_Mat, bool isEvenTimestep)
+{
+    DistributionReferences27 distribution_references;
+
+    if (isEvenTimestep) {
+        distribution_references.f[dirE]    = &distributions[dirE * size_Mat];
+        distribution_references.f[dirW]    = &distributions[dirW * size_Mat];
+        distribution_references.f[dirN]    = &distributions[dirN * size_Mat];
+        distribution_references.f[dirS]    = &distributions[dirS * size_Mat];
+        distribution_references.f[dirT]    = &distributions[dirT * size_Mat];
+        distribution_references.f[dirB]    = &distributions[dirB * size_Mat];
+        distribution_references.f[dirNE]   = &distributions[dirNE * size_Mat];
+        distribution_references.f[dirSW]   = &distributions[dirSW * size_Mat];
+        distribution_references.f[dirSE]   = &distributions[dirSE * size_Mat];
+        distribution_references.f[dirNW]   = &distributions[dirNW * size_Mat];
+        distribution_references.f[dirTE]   = &distributions[dirTE * size_Mat];
+        distribution_references.f[dirBW]   = &distributions[dirBW * size_Mat];
+        distribution_references.f[dirBE]   = &distributions[dirBE * size_Mat];
+        distribution_references.f[dirTW]   = &distributions[dirTW * size_Mat];
+        distribution_references.f[dirTN]   = &distributions[dirTN * size_Mat];
+        distribution_references.f[dirBS]   = &distributions[dirBS * size_Mat];
+        distribution_references.f[dirBN]   = &distributions[dirBN * size_Mat];
+        distribution_references.f[dirTS]   = &distributions[dirTS * size_Mat];
+        distribution_references.f[dirREST] = &distributions[dirREST * size_Mat];
+        distribution_references.f[dirTNE]  = &distributions[dirTNE * size_Mat];
+        distribution_references.f[dirTSW]  = &distributions[dirTSW * size_Mat];
+        distribution_references.f[dirTSE]  = &distributions[dirTSE * size_Mat];
+        distribution_references.f[dirTNW]  = &distributions[dirTNW * size_Mat];
+        distribution_references.f[dirBNE]  = &distributions[dirBNE * size_Mat];
+        distribution_references.f[dirBSW]  = &distributions[dirBSW * size_Mat];
+        distribution_references.f[dirBSE]  = &distributions[dirBSE * size_Mat];
+        distribution_references.f[dirBNW]  = &distributions[dirBNW * size_Mat];
+    } else {
+        distribution_references.f[dirW]    = &distributions[dirE * size_Mat];
+        distribution_references.f[dirE]    = &distributions[dirW * size_Mat];
+        distribution_references.f[dirS]    = &distributions[dirN * size_Mat];
+        distribution_references.f[dirN]    = &distributions[dirS * size_Mat];
+        distribution_references.f[dirB]    = &distributions[dirT * size_Mat];
+        distribution_references.f[dirT]    = &distributions[dirB * size_Mat];
+        distribution_references.f[dirSW]   = &distributions[dirNE * size_Mat];
+        distribution_references.f[dirNE]   = &distributions[dirSW * size_Mat];
+        distribution_references.f[dirNW]   = &distributions[dirSE * size_Mat];
+        distribution_references.f[dirSE]   = &distributions[dirNW * size_Mat];
+        distribution_references.f[dirBW]   = &distributions[dirTE * size_Mat];
+        distribution_references.f[dirTE]   = &distributions[dirBW * size_Mat];
+        distribution_references.f[dirTW]   = &distributions[dirBE * size_Mat];
+        distribution_references.f[dirBE]   = &distributions[dirTW * size_Mat];
+        distribution_references.f[dirBS]   = &distributions[dirTN * size_Mat];
+        distribution_references.f[dirTN]   = &distributions[dirBS * size_Mat];
+        distribution_references.f[dirTS]   = &distributions[dirBN * size_Mat];
+        distribution_references.f[dirBN]   = &distributions[dirTS * size_Mat];
+        distribution_references.f[dirREST] = &distributions[dirREST * size_Mat];
+        distribution_references.f[dirBSW]  = &distributions[dirTNE * size_Mat];
+        distribution_references.f[dirBNE]  = &distributions[dirTSW * size_Mat];
+        distribution_references.f[dirBNW]  = &distributions[dirTSE * size_Mat];
+        distribution_references.f[dirBSE]  = &distributions[dirTNW * size_Mat];
+        distribution_references.f[dirTSW]  = &distributions[dirBNE * size_Mat];
+        distribution_references.f[dirTNE]  = &distributions[dirBSW * size_Mat];
+        distribution_references.f[dirTNW]  = &distributions[dirBSE * size_Mat];
+        distribution_references.f[dirTSE]  = &distributions[dirBNW * size_Mat];
+    }
+    return distribution_references;
+}
+
+__device__ DistributionWrapper::DistributionWrapper(real *distributions, unsigned int size_Mat, bool isEvenTimestep,
+                                                    uint k, uint *neighborX, uint *neighborY, uint *neighborZ)
+    : distribution_references(getDistributionReferences27(distributions, size_Mat, isEvenTimestep)), k(k), kw(neighborX[k]), ks(neighborY[k]),
+      kb(neighborZ[k]), ksw(neighborY[kw]), kbw(neighborZ[kw]), kbs(neighborZ[ks]), kbsw(neighborZ[ksw])
+{
+    read();
+}
+
+__device__ void DistributionWrapper::read()
+{
+    distribution.f[vf::lbm::dir::PZZ] = (distribution_references.f[dirE])[k];
+    distribution.f[vf::lbm::dir::MZZ] = (distribution_references.f[dirW])[kw];
+    distribution.f[vf::lbm::dir::ZPZ] = (distribution_references.f[dirN])[k];
+    distribution.f[vf::lbm::dir::ZMZ] = (distribution_references.f[dirS])[ks];
+    distribution.f[vf::lbm::dir::ZZP] = (distribution_references.f[dirT])[k];
+    distribution.f[vf::lbm::dir::ZZM] = (distribution_references.f[dirB])[kb];
+    distribution.f[vf::lbm::dir::PPZ] = (distribution_references.f[dirNE])[k];
+    distribution.f[vf::lbm::dir::MMZ] = (distribution_references.f[dirSW])[ksw];
+    distribution.f[vf::lbm::dir::PMZ] = (distribution_references.f[dirSE])[ks];
+    distribution.f[vf::lbm::dir::MPZ] = (distribution_references.f[dirNW])[kw];
+    distribution.f[vf::lbm::dir::PZP] = (distribution_references.f[dirTE])[k];
+    distribution.f[vf::lbm::dir::MZM] = (distribution_references.f[dirBW])[kbw];
+    distribution.f[vf::lbm::dir::PZM] = (distribution_references.f[dirBE])[kb];
+    distribution.f[vf::lbm::dir::MZP] = (distribution_references.f[dirTW])[kw];
+    distribution.f[vf::lbm::dir::ZPP] = (distribution_references.f[dirTN])[k];
+    distribution.f[vf::lbm::dir::ZMM] = (distribution_references.f[dirBS])[kbs];
+    distribution.f[vf::lbm::dir::ZPM] = (distribution_references.f[dirBN])[kb];
+    distribution.f[vf::lbm::dir::ZMP] = (distribution_references.f[dirTS])[ks];
+    distribution.f[vf::lbm::dir::PPP] = (distribution_references.f[dirTNE])[k];
+    distribution.f[vf::lbm::dir::MPP] = (distribution_references.f[dirTNW])[kw];
+    distribution.f[vf::lbm::dir::PMP] = (distribution_references.f[dirTSE])[ks];
+    distribution.f[vf::lbm::dir::MMP] = (distribution_references.f[dirTSW])[ksw];
+    distribution.f[vf::lbm::dir::PPM] = (distribution_references.f[dirBNE])[kb];
+    distribution.f[vf::lbm::dir::MPM] = (distribution_references.f[dirBNW])[kbw];
+    distribution.f[vf::lbm::dir::PMM] = (distribution_references.f[dirBSE])[kbs];
+    distribution.f[vf::lbm::dir::MMM] = (distribution_references.f[dirBSW])[kbsw];
+    distribution.f[vf::lbm::dir::ZZZ] = (distribution_references.f[dirREST])[k];
+}
+
+__device__ void DistributionWrapper::write()
+{
+    (distribution_references.f[dirE])[k]      = distribution.f[vf::lbm::dir::PZZ];
+    (distribution_references.f[dirW])[kw]     = distribution.f[vf::lbm::dir::MZZ];
+    (distribution_references.f[dirN])[k]      = distribution.f[vf::lbm::dir::ZPZ];
+    (distribution_references.f[dirS])[ks]     = distribution.f[vf::lbm::dir::ZMZ];
+    (distribution_references.f[dirT])[k]      = distribution.f[vf::lbm::dir::ZZP];
+    (distribution_references.f[dirB])[kb]     = distribution.f[vf::lbm::dir::ZZM];
+    (distribution_references.f[dirNE])[k]     = distribution.f[vf::lbm::dir::PPZ];
+    (distribution_references.f[dirSW])[ksw]   = distribution.f[vf::lbm::dir::MMZ];
+    (distribution_references.f[dirSE])[ks]    = distribution.f[vf::lbm::dir::PMZ];
+    (distribution_references.f[dirNW])[kw]    = distribution.f[vf::lbm::dir::MPZ];
+    (distribution_references.f[dirTE])[k]     = distribution.f[vf::lbm::dir::PZP];
+    (distribution_references.f[dirBW])[kbw]   = distribution.f[vf::lbm::dir::MZM];
+    (distribution_references.f[dirBE])[kb]    = distribution.f[vf::lbm::dir::PZM];
+    (distribution_references.f[dirTW])[kw]    = distribution.f[vf::lbm::dir::MZP];
+    (distribution_references.f[dirTN])[k]     = distribution.f[vf::lbm::dir::ZPP];
+    (distribution_references.f[dirBS])[kbs]   = distribution.f[vf::lbm::dir::ZMM];
+    (distribution_references.f[dirBN])[kb]    = distribution.f[vf::lbm::dir::ZPM];
+    (distribution_references.f[dirTS])[ks]    = distribution.f[vf::lbm::dir::ZMP];
+    (distribution_references.f[dirTNE])[k]    = distribution.f[vf::lbm::dir::PPP];
+    (distribution_references.f[dirTNW])[kw]   = distribution.f[vf::lbm::dir::MPP];
+    (distribution_references.f[dirTSE])[ks]   = distribution.f[vf::lbm::dir::PMP];
+    (distribution_references.f[dirTSW])[ksw]  = distribution.f[vf::lbm::dir::MMP];
+    (distribution_references.f[dirBNE])[kb]   = distribution.f[vf::lbm::dir::PPM];
+    (distribution_references.f[dirBNW])[kbw]  = distribution.f[vf::lbm::dir::MPM];
+    (distribution_references.f[dirBSE])[kbs]  = distribution.f[vf::lbm::dir::PMM];
+    (distribution_references.f[dirBSW])[kbsw] = distribution.f[vf::lbm::dir::MMM];
+    (distribution_references.f[dirREST])[k]   = distribution.f[vf::lbm::dir::ZZZ];
+}
+
+__device__ unsigned int getNodeIndex()
+{
+    const unsigned x = threadIdx.x;
+    const unsigned y = blockIdx.x;
+    const unsigned z = blockIdx.y;
+
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
+
+    return nx * (ny * z + y) + x;
+}
+
+__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType)
+{
+    return (k < size_Mat) &&
+           (nodeType == GEO_FLUID || nodeType == GEO_PM_0 || nodeType == GEO_PM_1 || nodeType == GEO_PM_2);
+}
+
+
+} // namespace gpu
+} // namespace vf
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..935030701924233d959fb69b74a7c3087feb0834
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -0,0 +1,98 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Cumulant27chim.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Soeren Peters
+//=======================================================================================
+#ifndef DISTRIBUTUION_HELPER_CUH
+#define DISTRIBUTUION_HELPER_CUH
+
+#include "LBM/LB.h" 
+
+#include <lbm/KernelParameter.h>
+
+namespace vf
+{
+namespace gpu
+{
+
+/**
+*  Getting references to the 27 directions.
+*  @params distributions 1D real* array containing all data (number of elements = 27 * matrix_size)
+*  @params matrix_size number of discretizations nodes
+*  @params isEvenTimestep: stored data dependent on timestep is based on the esoteric twist algorithm
+*  @return a data struct containing the addresses to the 27 directions within the 1D distribution array
+*/
+__device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int matrix_size, bool isEvenTimestep);
+
+
+/**
+*  Holds the references to all directions and the concrete distributions for a single node.
+*  After instantiation the distributions are read to the member "distribution" from "distribution_references".
+*  After computation the data can be written back to "distribution_references".
+*/
+struct DistributionWrapper
+{
+    __device__ DistributionWrapper(
+        real* distributions,
+        unsigned int size_Mat,
+        bool isEvenTimestep,
+        uint k,
+        uint* neighborX,
+        uint* neighborY,
+        uint* neighborZ);
+
+    __device__ void read();
+
+    __device__ void write();
+
+    // origin distributions to read from and write to after computation
+    DistributionReferences27 distribution_references;
+
+    // distribution pass to kernel computation
+    vf::lbm::Distribution27 distribution;
+
+    const uint k;
+    const uint kw;
+    const uint ks;
+    const uint kb;
+    const uint ksw;
+    const uint kbw;
+    const uint kbs;
+    const uint kbsw;
+};
+
+__device__ unsigned int getNodeIndex();
+
+__device__ bool isValidFluidNode(uint k, int size_Mat, uint nodeType);
+
+}
+}
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46d3fe890fd2f412251044cce6ade51f08ba0185
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp
@@ -0,0 +1,93 @@
+#include <gmock/gmock.h>
+
+#include "DistributionHelper.cuh"
+
+#include "LBM/D3Q27.h"
+
+
+auto RealEq = [](auto value) { 
+#ifdef VF_DOUBLE_ACCURACY
+    return testing::DoubleEq(value); 
+#else 
+    return testing::FloatEq(value);
+#endif
+};
+
+
+TEST(DistributionHelperTests, getPointerToDistribution_WhenEvenTimeStep_ShouldBeEqualToInput)
+{
+    real distributions_in[27];
+    for (int i = 0; i < 27; i++)
+        distributions_in[i] = i;
+    const uint size_Mat = 1;
+    const bool isEvenTimeStep = true;
+
+    Distributions27 distribution_out = vf::gpu::getDistributionReferences27(distributions_in, size_Mat, isEvenTimeStep);
+
+    EXPECT_THAT(*distribution_out.f[dirE], RealEq(distributions_in[dirE]));
+    EXPECT_THAT(*distribution_out.f[dirW], RealEq(distributions_in[dirW]));
+    EXPECT_THAT(*distribution_out.f[dirN], RealEq(distributions_in[dirN]));
+    EXPECT_THAT(*distribution_out.f[dirS], RealEq(distributions_in[dirS]));
+    EXPECT_THAT(*distribution_out.f[dirT], RealEq(distributions_in[dirT]));
+    EXPECT_THAT(*distribution_out.f[dirB], RealEq(distributions_in[dirB]));
+    EXPECT_THAT(*distribution_out.f[dirNE], RealEq(distributions_in[dirNE]));
+    EXPECT_THAT(*distribution_out.f[dirSW], RealEq(distributions_in[dirSW]));
+    EXPECT_THAT(*distribution_out.f[dirSE], RealEq(distributions_in[dirSE]));
+    EXPECT_THAT(*distribution_out.f[dirNW], RealEq(distributions_in[dirNW]));
+    EXPECT_THAT(*distribution_out.f[dirTE], RealEq(distributions_in[dirTE]));
+    EXPECT_THAT(*distribution_out.f[dirBW], RealEq(distributions_in[dirBW]));
+    EXPECT_THAT(*distribution_out.f[dirBE], RealEq(distributions_in[dirBE]));
+    EXPECT_THAT(*distribution_out.f[dirTW], RealEq(distributions_in[dirTW]));
+    EXPECT_THAT(*distribution_out.f[dirTN], RealEq(distributions_in[dirTN]));
+    EXPECT_THAT(*distribution_out.f[dirBS], RealEq(distributions_in[dirBS]));
+    EXPECT_THAT(*distribution_out.f[dirBN], RealEq(distributions_in[dirBN]));
+    EXPECT_THAT(*distribution_out.f[dirTS], RealEq(distributions_in[dirTS]));
+    EXPECT_THAT(*distribution_out.f[dirREST], RealEq(distributions_in[dirREST]));
+    EXPECT_THAT(*distribution_out.f[dirTNE], RealEq(distributions_in[dirTNE]));
+    EXPECT_THAT(*distribution_out.f[dirTSW], RealEq(distributions_in[dirTSW]));
+    EXPECT_THAT(*distribution_out.f[dirTSE], RealEq(distributions_in[dirTSE]));
+    EXPECT_THAT(*distribution_out.f[dirTNW], RealEq(distributions_in[dirTNW]));
+    EXPECT_THAT(*distribution_out.f[dirBNE], RealEq(distributions_in[dirBNE]));
+    EXPECT_THAT(*distribution_out.f[dirBSW], RealEq(distributions_in[dirBSW]));
+    EXPECT_THAT(*distribution_out.f[dirBSE], RealEq(distributions_in[dirBSE]));
+    EXPECT_THAT(*distribution_out.f[dirBNW], RealEq(distributions_in[dirBNW]));
+}
+
+TEST(DistributionHelperTests, getPointerToDistribution_WhenOddTimeStep_ShouldBeSwapped)
+{
+    real distributions_in[27];
+    for (int i = 0; i < 27; i++)
+        distributions_in[i] = i;
+    const int size_Mat = 1;
+    const bool isEvenTimeStep = false;
+
+    Distributions27 distribution_out = vf::gpu::getDistributionReferences27(distributions_in, size_Mat, isEvenTimeStep);
+
+    EXPECT_THAT(*distribution_out.f[dirW], RealEq(distributions_in[dirE]));
+    EXPECT_THAT(*distribution_out.f[dirE], RealEq(distributions_in[dirW]));
+    EXPECT_THAT(*distribution_out.f[dirS], RealEq(distributions_in[dirN]));
+    EXPECT_THAT(*distribution_out.f[dirN], RealEq(distributions_in[dirS]));
+    EXPECT_THAT(*distribution_out.f[dirB], RealEq(distributions_in[dirT]));
+    EXPECT_THAT(*distribution_out.f[dirT], RealEq(distributions_in[dirB]));
+    EXPECT_THAT(*distribution_out.f[dirSW], RealEq(distributions_in[dirNE]));
+    EXPECT_THAT(*distribution_out.f[dirNE], RealEq(distributions_in[dirSW]));
+    EXPECT_THAT(*distribution_out.f[dirNW], RealEq(distributions_in[dirSE]));
+    EXPECT_THAT(*distribution_out.f[dirSE], RealEq(distributions_in[dirNW]));
+    EXPECT_THAT(*distribution_out.f[dirBW], RealEq(distributions_in[dirTE]));
+    EXPECT_THAT(*distribution_out.f[dirTE], RealEq(distributions_in[dirBW]));
+    EXPECT_THAT(*distribution_out.f[dirTW], RealEq(distributions_in[dirBE]));
+    EXPECT_THAT(*distribution_out.f[dirBE], RealEq(distributions_in[dirTW]));
+    EXPECT_THAT(*distribution_out.f[dirBS], RealEq(distributions_in[dirTN]));
+    EXPECT_THAT(*distribution_out.f[dirTN], RealEq(distributions_in[dirBS]));
+    EXPECT_THAT(*distribution_out.f[dirTS], RealEq(distributions_in[dirBN]));
+    EXPECT_THAT(*distribution_out.f[dirBN], RealEq(distributions_in[dirTS]));
+    EXPECT_THAT(*distribution_out.f[dirREST], RealEq(distributions_in[dirREST]));
+    EXPECT_THAT(*distribution_out.f[dirBSW], RealEq(distributions_in[dirTNE]));
+    EXPECT_THAT(*distribution_out.f[dirBNE], RealEq(distributions_in[dirTSW]));
+    EXPECT_THAT(*distribution_out.f[dirBNW], RealEq(distributions_in[dirTSE]));
+    EXPECT_THAT(*distribution_out.f[dirBSE], RealEq(distributions_in[dirTNW]));
+    EXPECT_THAT(*distribution_out.f[dirTSW], RealEq(distributions_in[dirBNE]));
+    EXPECT_THAT(*distribution_out.f[dirTNE], RealEq(distributions_in[dirBSW]));
+    EXPECT_THAT(*distribution_out.f[dirTNW], RealEq(distributions_in[dirBSE]));
+    EXPECT_THAT(*distribution_out.f[dirTSE], RealEq(distributions_in[dirBNW]));
+}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index b0d167974275a227b99da6a0ac8e32e1f2976c13..5f63df1c9afc17a62a9a47ce82401ebba4453872 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -4,16 +4,19 @@
 
 //LBM kernel (compressible)
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKUnified/BGKUnified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Unified/CumulantK15Unified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27.h"
@@ -96,13 +99,17 @@ void KernelFactoryImp::setKernelAtLevel(std::vector<std::shared_ptr<Kernel>> ker
 
 std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
 {
+    printf("Instantiating Kernel: %s\n", kernel.c_str());
 	std::shared_ptr<KernelImp> newKernel;
 	std::shared_ptr<CheckParameterStrategy> checkStrategy;
 
-	if (       kernel == "BGKCompSP27") {
-        newKernel     = BGKCompSP27::getNewInstance(para, level);				// compressible
-        checkStrategy = FluidFlowCompStrategy::getInstance();     //	   ||
-    } else if (kernel == "BGKPlusCompSP27") {									//     \/
+    if (kernel == "BGKCompSP27") {
+        newKernel     = BGKCompSP27::getNewInstance(para, level);   // compressible
+        checkStrategy = FluidFlowCompStrategy::getInstance();       //      ||
+    } else if (kernel == "BGKUnified") {                            //      \/
+        newKernel     = std::make_shared<vf::gpu::BGKUnified>(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "BGKPlusCompSP27") {
         newKernel     = BGKPlusCompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "MRTCompSP27") {
@@ -117,6 +124,12 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     } else if (kernel == "CumulantK17Comp") {
         newKernel     = CumulantK17Comp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "CumulantK15Unified") {
+        newKernel     = std::make_shared<vf::gpu::CumulantK15Unified>(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "CumulantK17Unified") {
+        newKernel     = std::make_shared<vf::gpu::CumulantK17Unified>(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantK17BulkComp") {
         newKernel     = CumulantK17BulkComp::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
diff --git a/src/gpu/VirtualFluids_GPU/LBM/D3Q27.h b/src/gpu/VirtualFluids_GPU/LBM/D3Q27.h
index 546dccd463a96ad3173517d09f222acde11e2448..5e769b603e7c6677b7f9e1189f5c4fe27051f795 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/D3Q27.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/D3Q27.h
@@ -21,6 +21,7 @@
 #define dirBN   /*f17*/  16
 #define dirTS   /*f18*/  17
 #define dirZERO /*f0 */  18
+#define dirREST /*f0 */  18
 
 #define dirTNE    /*f */  19
 #define dirBNE    /*f */  20
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index f5f91738739b95f897a1192cb7c1d77a6cfea9cb..52f932c7b751b0d31e9bf733a4f3b5c439a6755f 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -144,7 +144,7 @@ typedef struct  Distri19{
 // Distribution functions f 27
 typedef struct  Distri27{
    real* f[27];
-} Distributions27;
+} Distributions27, DistributionReferences27;
 
 //Q for second order BCs
 typedef struct QforBC{
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index f61bd5571f60b5feb771f351a7877cb15a5842cb..9a4cd41cea8996db4a8e3ed92b44121c3d549f45 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -1,4 +1,10 @@
 #include "Simulation.h"
+
+#include <stdio.h>
+#include <vector>
+
+#include <helper_timer.h>
+
 #include "LBM/LB.h"
 #include "Communication/Communicator.h"
 #include "Communication/ExchangeData27.h"
@@ -85,12 +91,13 @@ void Simulation::init(SPtr<Parameter> para, SPtr<GridProvider> gridProvider, std
    para->setNumprocs(comm->getNummberOfProcess());
    devCheck(comm->mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
 
+   para->initParameter();
+
    gridProvider->allocAndCopyForcing();
    gridProvider->allocAndCopyQuadricLimiters();
    gridProvider->setDimensions();
    gridProvider->setBoundingBox();
 
-   para->initParameter();
    para->setRe(para->getVelocity() * (real)1.0 / para->getViscosity());
    para->setPhi((real) 0.0);
    para->setlimitOfNodesForVTK(30000000); //max 30 Million nodes per VTK file
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index ae46034ce3e6080e0b9f03a92dd0326d640c6f32..d6c6702c4db8a671d4f6dbfea4c90cdf8f48356d 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -5,13 +5,12 @@
 #include <vector>
 #include <PointerDefinitions.h>
 
-
-
 #include "Output/LogWriter.hpp"
 #include "GPU/KineticEnergyAnalyzer.h"
 #include "GPU/EnstrophyAnalyzer.h"
 #include "Utilities/Buffer2D.hpp"
 #include "LBM/LB.h"
+
 #include "VirtualFluids_GPU_export.h"
 
 namespace vf
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 25946704edcad12513d1b8a2c70cd551eb1dd619..ccb13e83bcc68b9c9d85725666a02ea1c23ec293 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -186,6 +186,7 @@ struct ParameterStruct{
 	//velocities to fit the force
 	real *VxForce, *VyForce, *VzForce;
 	//////////////////////////////////////////////////////////////////////////
+	real *forcing;
 
 	//Measure Points/////////
 	std::vector<MeasurePoints> MP; 
diff --git a/src/gpu/VirtualFluids_GPU/Restart/RestartObjectTests.cpp b/src/gpu/VirtualFluids_GPU/Restart/RestartObjectTests.cpp
index a9b485ab25871066c201e631b97022bc6f7e08fb..c30f514c60ba1af2fb852ed8a68b36121ebced04 100644
--- a/src/gpu/VirtualFluids_GPU/Restart/RestartObjectTests.cpp
+++ b/src/gpu/VirtualFluids_GPU/Restart/RestartObjectTests.cpp
@@ -8,7 +8,7 @@ void saveAndLoad()
 {
     std::shared_ptr<RestartObject> write_object = std::make_shared<Type>();
 
-    write_object->fs = std::vector<std::vector<float>> {
+    write_object->fs = std::vector<std::vector<real>> {
                 { 1,2,3 },
                 { 4,5,6 }
             };
diff --git a/src/lbm/BGK.cpp b/src/lbm/BGK.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa3af6777a0492687768dd4945cbf1e9b186f514
--- /dev/null
+++ b/src/lbm/BGK.cpp
@@ -0,0 +1,140 @@
+#include "BGK.h"
+
+
+#include <basics/Core/DataTypes.h>
+#include <basics/Core/RealConstants.h>
+
+#include "constants/NumericConstants.h"
+#include "constants/D3Q27.h"
+
+#include "MacroscopicQuantities.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+using namespace constant;
+
+
+
+__host__ __device__ void bgk(KernelParameter parameter)
+{
+    auto& distribution = parameter.distribution;
+    const auto omega = parameter.omega;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    real mfcbb = distribution.f[dir::PZZ];
+    real mfabb = distribution.f[dir::MZZ];
+    real mfbcb = distribution.f[dir::ZPZ];
+    real mfbab = distribution.f[dir::ZMZ];
+    real mfbbc = distribution.f[dir::ZZP];
+    real mfbba = distribution.f[dir::ZZM];
+    real mfccb = distribution.f[dir::PPZ];
+    real mfaab = distribution.f[dir::MMZ];
+    real mfcab = distribution.f[dir::PMZ];
+    real mfacb = distribution.f[dir::MPZ];
+    real mfcbc = distribution.f[dir::PZP];
+    real mfaba = distribution.f[dir::MZM];
+    real mfcba = distribution.f[dir::PZM];
+    real mfabc = distribution.f[dir::MZP];
+    real mfbcc = distribution.f[dir::ZPP];
+    real mfbaa = distribution.f[dir::ZMM];
+    real mfbca = distribution.f[dir::ZPM];
+    real mfbac = distribution.f[dir::ZMP];
+    real mfccc = distribution.f[dir::PPP];
+    real mfacc = distribution.f[dir::MPP];
+    real mfcac = distribution.f[dir::PMP];
+    real mfaac = distribution.f[dir::MMP];
+    real mfcca = distribution.f[dir::PPM];
+    real mfaca = distribution.f[dir::MPM];
+    real mfcaa = distribution.f[dir::PMM];
+    real mfaaa = distribution.f[dir::MMM];
+    real mfbbb = distribution.f[dir::ZZZ];
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Acquire macroscopic quantities
+    const real drho = getDensity(distribution.f);
+    const real rho = c1o1 + drho;
+    const real OOrho = constant::c1o1 / (constant::c1o1 + drho);    
+
+    const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho;
+    const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho;
+    const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho;
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - BGK computation
+    const real cusq = c3o2*(vvx*vvx + vvy*vvy + vvz*vvz);
+
+    mfbbb = mfbbb  *(c1o1 + (-omega)) - (-omega)*   c8o27*  (drho - rho * cusq);
+    mfcbb = mfcbb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvx)+c9o2*(vvx)*(vvx)-cusq));
+    mfabb = mfabb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvx) + c9o2*(-vvx)*(-vvx) - cusq));
+    mfbcb = mfbcb  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvy)+c9o2*(vvy)*(vvy)-cusq));
+    mfbab = mfbab  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvy) + c9o2*(-vvy)*(-vvy) - cusq));
+    mfbbc = mfbbc  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(vvz)+c9o2*(vvz)*(vvz)-cusq));
+    mfbba = mfbba  *(c1o1 + (-omega)) - (-omega)*   c2o27*  (drho + rho * (c3o1*(-vvz) + c9o2*(-vvz)*(-vvz) - cusq));
+    mfccb = mfccb  *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(vvx + vvy) + c9o2*(vvx + vvy)*(vvx + vvy) - cusq));
+    mfaab = mfaab  *(c1o1 + (-omega)) - (-omega)*   c1o54*  (drho + rho * (c3o1*(-vvx - vvy) + c9o2*(-vvx - vvy)*(-vvx - vvy) - cusq));
+    mfcab = mfcab  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvy) + c9o2*(vvx - vvy)*(vvx - vvy) - cusq));
+    mfacb = mfacb  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvy) + c9o2*(-vvx + vvy)*(-vvx + vvy) - cusq));
+    mfcbc = mfcbc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx + vvz) + c9o2*(vvx + vvz)*(vvx + vvz) - cusq));
+    mfaba = mfaba  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx - vvz) + c9o2*(-vvx - vvz)*(-vvx - vvz) - cusq));
+    mfcba = mfcba  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvx - vvz) + c9o2*(vvx - vvz)*(vvx - vvz) - cusq));
+    mfabc = mfabc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvx + vvz) + c9o2*(-vvx + vvz)*(-vvx + vvz) - cusq));
+    mfbcc = mfbcc  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy + vvz) + c9o2*(vvy + vvz)*(vvy + vvz) - cusq));
+    mfbaa = mfbaa  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy - vvz) + c9o2*(-vvy - vvz)*(-vvy - vvz) - cusq));
+    mfbca = mfbca  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(vvy - vvz) + c9o2*(vvy - vvz)*(vvy - vvz) - cusq));
+    mfbac = mfbac  *(c1o1 + (-omega)) - (-omega)*    c1o54* (drho + rho * (c3o1*(-vvy + vvz) + c9o2*(-vvy + vvz)*(-vvy + vvz) - cusq));
+    mfccc = mfccc  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy + vvz) + c9o2*(vvx + vvy + vvz)*(vvx + vvy + vvz) - cusq));
+    mfaaa = mfaaa  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx - vvy - vvz) + c9o2*(-vvx - vvy - vvz)*(-vvx - vvy - vvz) - cusq));
+    mfcca = mfcca  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx + vvy - vvz) + c9o2*(vvx + vvy - vvz)*(vvx + vvy - vvz) - cusq));
+    mfaac = mfaac  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx - vvy + vvz) + c9o2*(-vvx - vvy + vvz)*(-vvx - vvy + vvz) - cusq));
+    mfcac = mfcac  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx - vvy + vvz) + c9o2*(vvx - vvy + vvz)*(vvx - vvy + vvz) - cusq));
+    mfaca = mfaca  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx + vvy - vvz) + c9o2*(-vvx + vvy - vvz)*(-vvx + vvy - vvz) - cusq));
+    mfcaa = mfcaa  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(vvx - vvy - vvz) + c9o2*(vvx - vvy - vvz)*(vvx - vvy - vvz) - cusq));
+    mfacc = mfacc  *(c1o1 + (-omega)) - (-omega)*    c1o216*(drho + rho * (c3o1*(-vvx + vvy + vvz) + c9o2*(-vvx + vvy + vvz)*(-vvx + vvy + vvz) - cusq));
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Write distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    distribution.f[dir::MZZ] = mfcbb;
+    distribution.f[dir::PZZ] = mfabb;
+    distribution.f[dir::ZMZ] = mfbcb;
+    distribution.f[dir::ZPZ] = mfbab;
+    distribution.f[dir::ZZM] = mfbbc;
+    distribution.f[dir::ZZP] = mfbba;
+    distribution.f[dir::MMZ] = mfccb;
+    distribution.f[dir::PPZ] = mfaab;
+    distribution.f[dir::MPZ] = mfcab;
+    distribution.f[dir::PMZ] = mfacb;
+    distribution.f[dir::MZM] = mfcbc;
+    distribution.f[dir::PZP] = mfaba;
+    distribution.f[dir::MZP] = mfcba;
+    distribution.f[dir::PZM] = mfabc;
+    distribution.f[dir::ZMM] = mfbcc;
+    distribution.f[dir::ZPP] = mfbaa;
+    distribution.f[dir::ZMP] = mfbca;
+    distribution.f[dir::ZPM] = mfbac;
+    distribution.f[dir::MMM] = mfccc;
+    distribution.f[dir::PMM] = mfacc;
+    distribution.f[dir::MPM] = mfcac;
+    distribution.f[dir::PPM] = mfaac;
+    distribution.f[dir::MMP] = mfcca;
+    distribution.f[dir::PMP] = mfaca;
+    distribution.f[dir::MPP] = mfcaa;
+    distribution.f[dir::PPP] = mfaaa;
+    distribution.f[dir::ZZZ] = mfbbb;
+}
+
+
+}
+}
+
diff --git a/src/lbm/BGK.h b/src/lbm/BGK.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c82f5bd445ee008954add02fd0d6d6093364e90
--- /dev/null
+++ b/src/lbm/BGK.h
@@ -0,0 +1,24 @@
+#ifndef LBM_BGK_H
+#define LBM_BGK_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include "KernelParameter.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+__host__ __device__ void bgk(KernelParameter parameter);
+
+}
+}
+#endif
diff --git a/src/lbm/CMakeLists.txt b/src/lbm/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e9f76793825ccf4e4e9e921e9fd4b6ab3584707
--- /dev/null
+++ b/src/lbm/CMakeLists.txt
@@ -0,0 +1,10 @@
+project(lbm LANGUAGES CXX)
+
+vf_add_library(NAME lbm PUBLIC_LINK basics)
+
+if(BUILD_VF_GPU)
+    add_subdirectory(cuda)
+endif()
+
+
+vf_add_tests()
diff --git a/src/lbm/Chimera.h b/src/lbm/Chimera.h
new file mode 100644
index 0000000000000000000000000000000000000000..6ffa0918aac4e6303efe4db82aa98ee645dc63e8
--- /dev/null
+++ b/src/lbm/Chimera.h
@@ -0,0 +1,121 @@
+#ifndef LBM_CHIMERA_H
+#define LBM_CHIMERA_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include <lbm/constants/NumericConstants.h>
+
+namespace vf
+{
+namespace lbm
+{
+
+using namespace constant;
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardInverseChimeraWithK 
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+//! Modified for lower round-off errors.
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ void forwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv,
+                                       real v2, real Kinverse, real K)
+{
+    const real m2 = mfa + mfc;
+    const real m1 = mfc - mfa;
+    real m0 = m2 + mfb;
+
+    mfa = m0;
+    m0 *= Kinverse;
+    m0 += c1o1;
+    mfb = (m1 * Kinverse - m0 * vv) * K;
+    mfc = ((m2 - c2o1 * m1 * vv) * Kinverse + v2 * m0) * K;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardInverseChimeraWithK
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+//! ] Modified for lower round-off errors.
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ void backwardInverseChimeraWithK(real &mfa, real &mfb, real &mfc, real vv,
+                                        real v2, real Kinverse, real K)
+{
+    const real m0 = (((mfc - mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 - vv) * c1o2) * K;
+    const real m1 = (((mfa - mfc) - c2o1 * mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (-v2)) * K;
+
+    mfc = (((mfc + mfb) * c1o2 + mfb * vv) * Kinverse + (mfa * Kinverse + c1o1) * (v2 + vv) * c1o2) * K;
+    mfa = m0;
+    mfb = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief forward chimera transformation \ref forwardChimera 
+//! Transformation from distributions to central moments according to Eq. (6)-(14) in
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+//! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
+//! Modified for lower round-off errors.
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ void forwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    const real m1 = (mfa + mfc) + mfb;
+    const real m2 = mfc - mfa;
+
+    mfc = (mfc + mfa) + (v2 * m1 - c2o1 * vv * m2);
+    mfb = m2 - vv * m1;
+    mfa = m1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! \brief backward chimera transformation \ref backwardChimera 
+//! Transformation from central moments to distributions according to Eq. (57)-(65) in
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+//! for \f$ K_{abc}=0 \f$. This is to avoid unnessary floating point operations.
+//! Modified for lower round-off errors.
+////////////////////////////////////////////////////////////////////////////////
+inline __host__ __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv, real v2)
+{
+    const real ma = (mfc + mfa * (v2 - vv)) * c1o2 + mfb * (vv - c1o2);
+    const real mb = ((mfa - mfc) - mfa * v2) - c2o1 * mfb * vv;
+
+    mfc = (mfc + mfa * (v2 + vv)) * c1o2 + mfb * (vv + c1o2);
+    mfb = mb;
+    mfa = ma;
+}
+
+
+inline __host__ __device__ void forwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K) 
+{
+
+    const real m2 = mfa + mfc;
+    const real m1 = mfc - mfa;
+    const real m0 = m2 + mfb;
+    mfa = m0;
+    //m0     += K;
+    mfb = (m1 - K*vv) - m0 * vv;
+    mfc = ((m2 - c2o1*	m1 * vv) + v2*K) + v2 * m0;
+    //m0 += K;
+    //mfb = m1 - m0 * vv;
+    //mfc = m2 - two*	m1 * vv + v2 * m0;
+}
+
+
+inline __host__ __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, real vv, real v2, real K) 
+{
+    const real  m0 = (mfc - mfb)* c1o2 + mfb * (vv)+(mfa + K) * (v2 - vv) * c1o2;
+    const real m1 = (mfa - mfc) - c2o1* mfb * vv + (mfa + K) * (-v2);
+    mfc = (mfc + mfb)* c1o2 + mfb * (vv)+(mfa + K) * (v2 + vv) * c1o2;
+    mfa = m0;
+    mfb = m1;
+}
+
+}
+}
+#endif
diff --git a/src/lbm/ChimeraTests.cpp b/src/lbm/ChimeraTests.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..01abbe82764c276f53a4cdd479d333536199c3a9
--- /dev/null
+++ b/src/lbm/ChimeraTests.cpp
@@ -0,0 +1,133 @@
+#include <gmock/gmock.h>
+
+#include "Chimera.h"
+
+#ifdef VF_DOUBLE_ACCURACY
+#define REAL_EQ(a) testing::DoubleEq(a)
+#else
+#define REAL_EQ(a) testing::FloatEq(a)
+#endif
+
+/*
+* InverseChimeraWithK
+*/
+TEST(ChimeraTest, forwardInverseChimeraWithK)
+{
+    real mfa = 1;
+    real mfb = 1;
+    real mfc = 1;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    const real K = 1.;
+    const real Kinverse = 1 / K;
+
+    vf::lbm::forwardInverseChimeraWithK(mfa, mfb, mfc, vv, v2, K, Kinverse);
+
+    EXPECT_THAT(mfa, REAL_EQ(3.));  // mfa + mfb + mfc
+    EXPECT_THAT(mfb, REAL_EQ(-4.)); // -(mfa + mfb + mfc + 1)
+    EXPECT_THAT(mfc, REAL_EQ(6.));  // (mfa + mfc) + (mfa + mfb + mfc + 1)
+}
+
+
+TEST(ChimeraTest, backwardInverseChimeraWithK)
+{
+    // starting with the result values from the test above.
+    real mfa = 3.;
+    real mfb = -4.;
+    real mfc = 6.;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    const real K = 1.;
+    const real Kinverse = 1 / K;
+
+    vf::lbm::backwardInverseChimeraWithK(mfa, mfb, mfc, vv, v2, K, Kinverse);
+
+    // resulting in the start values from the test above.
+    EXPECT_THAT(mfa, REAL_EQ(1.));
+    EXPECT_THAT(mfb, REAL_EQ(1.));
+    EXPECT_THAT(mfc, REAL_EQ(1.));
+}
+
+/*
+* Chimera
+*/
+TEST(ChimeraTest, forwardChimera)
+{
+    real mfa = 1;
+    real mfb = 1;
+    real mfc = 1;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    vf::lbm::forwardChimera(mfa, mfb, mfc, vv, v2);
+
+    EXPECT_THAT(mfa, REAL_EQ(3.));  // mfa + mfb + mfc
+    EXPECT_THAT(mfb, REAL_EQ(-3.)); // -(mfa + mfb + mfc)
+    EXPECT_THAT(mfc, REAL_EQ(5.));  // (mfa + mfc) + (mfa + mfb + mfc)
+}
+
+
+TEST(ChimeraTest, backwardChimera)
+{
+    // starting with the result values from the test above.
+    real mfa = 3.;
+    real mfb = -3.;
+    real mfc = 5.;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    vf::lbm::backwardChimera(mfa, mfb, mfc, vv, v2);
+
+    // resulting in the start values from the test above.
+    EXPECT_THAT(mfa, REAL_EQ(1.));
+    EXPECT_THAT(mfb, REAL_EQ(1.));
+    EXPECT_THAT(mfc, REAL_EQ(1.));
+}
+
+/*
+* ChimeraWithK
+*/
+TEST(ChimeraTest, forwardChimeraWithK)
+{
+    real mfa = 1;
+    real mfb = 1;
+    real mfc = 1;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    const real K = 1.;
+
+    vf::lbm::forwardChimeraWithK(mfa, mfb, mfc, vv, v2, K);
+
+    EXPECT_THAT(mfa, REAL_EQ(3.));  // mfa + mfb + mfc
+    EXPECT_THAT(mfb, REAL_EQ(-4.)); // -(mfa + mfb + mfc)
+    EXPECT_THAT(mfc, REAL_EQ(6.));  // (mfa + mfc) + (mfa + mfb + mfc)
+}
+
+
+TEST(ChimeraTest, backwardChimeraWithK)
+{
+    // starting with the result values from the test above.
+    real mfa = 3.;
+    real mfb = -4.;
+    real mfc = 6.;
+
+    const real vv = 1.;
+    const real v2 = 1.;
+
+    const real K = 1.;
+
+    vf::lbm::backwardChimeraWithK(mfa, mfb, mfc, vv, v2, K);
+
+    // resulting in the start values from the test above.
+    EXPECT_THAT(mfa, REAL_EQ(1.));
+    EXPECT_THAT(mfb, REAL_EQ(1.));
+    EXPECT_THAT(mfc, REAL_EQ(1.));
+}
diff --git a/src/lbm/CumulantChimera.cpp b/src/lbm/CumulantChimera.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..65dc9b1f82f409447fda54fc0bc5f460533aad1b
--- /dev/null
+++ b/src/lbm/CumulantChimera.cpp
@@ -0,0 +1,453 @@
+#include "CumulantChimera.h"
+
+#include <cmath>
+
+#include <basics/Core/DataTypes.h>
+#include <basics/Core/RealConstants.h>
+
+#include "constants/NumericConstants.h"
+#include "constants/D3Q27.h"
+
+#include "Chimera.h"
+#include "MacroscopicQuantities.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+using namespace constant;
+
+
+////////////////////////////////////////////////////////////////////////////////////
+//! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations    according to
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+//!  => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE].
+//!  - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk  viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$.
+//!  - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz   \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$.
+//!  - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 =  OxyyMxzz \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$.
+//!  - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with   simplifications assuming \f$ \omega_2 = 1.0\f$  (modify for different bulk viscosity).
+//!  - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification  all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$.
+//!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
+//!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
+//////////////////////////////////////////////////////////////////////////
+__host__ __device__ void setRelaxationRatesK17(real omega, real &OxxPyyPzz, real &OxyyPxzz, real &OxyyMxzz, real &Oxyz,
+                                               real &O4, real &O5, real &O6)
+{
+    OxxPyyPzz = c1o1;
+
+    OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
+    OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
+    Oxyz     = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
+                (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
+
+    O4 = c1o1;
+
+    O5 = c1o1;
+
+    O6 = c1o1;
+}
+
+
+__host__ __device__ void setRelaxationRatesK15(real omega, real &OxxPyyPzz, real &OxyyPxzz, real &OxyyMxzz, real &Oxyz,
+                                               real &O4, real &O5, real &O6)
+{
+    OxxPyyPzz = c1o1;
+
+    OxyyPxzz = c1o1;
+    OxyyMxzz = c1o1;
+    Oxyz     = c1o1;
+
+    O4 = c1o1;
+
+    O5 = c1o1;
+
+    O6 = c1o1;
+}
+
+//////////////////////////////////////////////////////////////////////////
+//! Cumulant K17 Kernel is based on \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+//! and \ref
+//! <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
+//////////////////////////////////////////////////////////////////////////
+__host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRatesFunctor setRelaxationRates)
+{
+    auto& distribution = parameter.distribution;
+    const auto omega = parameter.omega;
+    const auto* forces = parameter.forces;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    real mfcbb = distribution.f[dir::PZZ];
+    real mfabb = distribution.f[dir::MZZ];
+    real mfbcb = distribution.f[dir::ZPZ];
+    real mfbab = distribution.f[dir::ZMZ];
+    real mfbbc = distribution.f[dir::ZZP];
+    real mfbba = distribution.f[dir::ZZM];
+    real mfccb = distribution.f[dir::PPZ];
+    real mfaab = distribution.f[dir::MMZ];
+    real mfcab = distribution.f[dir::PMZ];
+    real mfacb = distribution.f[dir::MPZ];
+    real mfcbc = distribution.f[dir::PZP];
+    real mfaba = distribution.f[dir::MZM];
+    real mfcba = distribution.f[dir::PZM];
+    real mfabc = distribution.f[dir::MZP];
+    real mfbcc = distribution.f[dir::ZPP];
+    real mfbaa = distribution.f[dir::ZMM];
+    real mfbca = distribution.f[dir::ZPM];
+    real mfbac = distribution.f[dir::ZMP];
+    real mfccc = distribution.f[dir::PPP];
+    real mfacc = distribution.f[dir::MPP];
+    real mfcac = distribution.f[dir::PMP];
+    real mfaac = distribution.f[dir::MMP];
+    real mfcca = distribution.f[dir::PPM];
+    real mfaca = distribution.f[dir::MPM];
+    real mfcaa = distribution.f[dir::PMM];
+    real mfaaa = distribution.f[dir::MMM];
+    real mfbbb = distribution.f[dir::ZZZ];
+
+
+    const real drho = getDensity(distribution.f);
+    const real OOrho = c1o1 / (c1o1 + drho);    
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //!
+    const real vvx = getIncompressibleVelocityX1(distribution.f) * OOrho + forces[0] * c1o2;
+    const real vvy = getIncompressibleVelocityX2(distribution.f) * OOrho + forces[0] * c1o2;
+    const real vvz = getIncompressibleVelocityX3(distribution.f) * OOrho + forces[0] * c1o2;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // calculate the square of velocities for this lattice node
+    const real vx2 = vvx*vvx;
+    const real vy2 = vvy*vvy;
+    const real vz2 = vvz*vvz;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //! see also Eq. (6)-(14) in \ref
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    vf::lbm::forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+    vf::lbm::forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+    vf::lbm::forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2,  c9o4,  c4o9);
+    vf::lbm::forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+    vf::lbm::forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    vf::lbm::forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2,  c6o1,  c1o6);
+    vf::lbm::forwardChimera(            mfaab, mfabb, mfacb, vvy, vy2);
+    vf::lbm::forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
+    vf::lbm::forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2,  c3o2,  c2o3);
+    vf::lbm::forwardChimera(            mfbab, mfbbb, mfbcb, vvy, vy2);
+    vf::lbm::forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2,  c9o2,  c2o9);
+    vf::lbm::forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2,  c6o1,  c1o6);
+    vf::lbm::forwardChimera(            mfcab, mfcbb, mfccb, vvy, vy2);
+    vf::lbm::forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    vf::lbm::forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
+    vf::lbm::forwardChimera(            mfaba, mfbba, mfcba, vvx, vx2);
+    vf::lbm::forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
+    vf::lbm::forwardChimera(            mfaab, mfbab, mfcab, vvx, vx2);
+    vf::lbm::forwardChimera(            mfabb, mfbbb, mfcbb, vvx, vx2);
+    vf::lbm::forwardChimera(            mfacb, mfbcb, mfccb, vvx, vx2);
+    vf::lbm::forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
+    vf::lbm::forwardChimera(            mfabc, mfbbc, mfcbc, vvx, vx2);
+    vf::lbm::forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9); 
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
+    real OxxPyyPzz;
+    real OxyyPxzz;
+    real OxyyMxzz;
+    real Oxyz;
+    real O4;
+    real O5;
+    real O6;
+
+    setRelaxationRates(omega, OxxPyyPzz, OxyyPxzz, OxyyMxzz, Oxyz, O4, O5, O6);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (114) and (115) 
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //! with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for different bulk viscosity).
+    //!
+    const real A = (c4o1 + c2o1*omega - c3o1*omega*omega) / (c2o1 - c7o1*omega + c5o1*omega*omega);
+    const real B = (c4o1 + c28o1*omega - c14o1*omega*omega) / (c6o1 - c21o1*omega + c15o1*omega*omega);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute cumulants from central moments according to Eq. (20)-(23) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////
+    //4.
+    real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
+    real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
+    real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;  
+    real CUMcca = mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9*(drho   * OOrho));
+    real CUMcac = mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9*(drho   * OOrho));
+    real CUMacc = mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9*(drho   * OOrho));
+    ////////////////////////////////////////////////////////////
+    //5.
+    real CUMbcc = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba *  mfabc)) + c1o3 * (mfbca + mfbac)) * OOrho;
+    real CUMcbc = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba *  mfbac)) + c1o3 * (mfcba + mfabc)) * OOrho;
+    real CUMccb = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb *  mfcba)) + c1o3 * (mfacb + mfcab)) * OOrho;
+    ////////////////////////////////////////////////////////////
+    //6.
+    real CUMccc = mfccc + ((-c4o1 *  mfbbb * mfbbb
+        - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+        - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+        - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+        + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+        + c2o1 * (mfcaa * mfaca * mfaac)
+        + c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
+        - c1o3 * (mfacc + mfcac + mfcca) * OOrho
+        - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
+        + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+        + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho  * c2o3
+        + c1o27*((drho * drho - drho) * OOrho * OOrho));    
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute linear combinations of second and third order cumulants
+    //!
+    ////////////////////////////////////////////////////////////
+    //2.
+    real mxxPyyPzz = mfcaa + mfaca + mfaac;
+    real mxxMyy = mfcaa - mfaca;
+    real mxxMzz = mfcaa - mfaac;
+    ////////////////////////////////////////////////////////////
+    //3.
+    real mxxyPyzz = mfcba + mfabc;
+    real mxxyMyzz = mfcba - mfabc;  
+    real mxxzPyyz = mfcab + mfacb;
+    real mxxzMyyz = mfcab - mfacb;  
+    real mxyyPxzz = mfbca + mfbac;
+    real mxyyMxzz = mfbca - mfbac;  
+    ////////////////////////////////////////////////////////////////////////////////////
+    //incl. correction
+    ////////////////////////////////////////////////////////////
+    //! - Compute velocity  gradients from second order cumulants according to Eq. (27)-(32)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //! Further explanations of the correction in viscosity in Appendix H of
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //! Note that the division by rho is omitted here as we need rho times the gradients later.
+    //!
+    const real Dxy = -c3o1*omega*mfbba;
+    const real Dxz = -c3o1*omega*mfbab;
+    const real Dyz = -c3o1*omega*mfabb;
+    const real dxux = c1o2 * (-omega) *(mxxMyy + mxxMzz) + c1o2 *  OxxPyyPzz * (mfaaa - mxxPyyPzz);
+    const real dyuy = dxux + omega * c3o2 * mxxMyy;
+    const real dzuz = dxux + omega * c3o2 * mxxMzz;
+    ////////////////////////////////////////////////////////////
+    //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+    mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2  * dzuz);
+    mxxMyy    += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+    mxxMzz    += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////no correction
+    //mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);
+    //mxxMyy += -(-omega) * (-mxxMyy);
+    //mxxMzz += -(-omega) * (-mxxMzz);
+    //////////////////////////////////////////////////////////////////////////
+    mfabb += omega * (-mfabb);
+    mfbab += omega * (-mfbab);
+    mfbba += omega * (-mfbba);  
+    ////////////////////////////////////////////////////////////////////////////////////
+    //relax
+    //////////////////////////////////////////////////////////////////////////
+    // incl. limiter
+    //! Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to section 6 in \ref
+    //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+
+    const real qudricLimitP = c1o100;
+    const real qudricLimitM = c1o100;
+    const real qudricLimitD = c1o100;
+
+    real wadjust   = Oxyz + (c1o1 - Oxyz)*abs_internal(mfbbb) / (abs_internal(mfbbb) + qudricLimitD);
+    mfbbb    += wadjust * (-mfbbb);
+    wadjust   = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxyPyzz) / (abs_internal(mxxyPyzz) + qudricLimitP);
+    mxxyPyzz += wadjust * (-mxxyPyzz);
+    wadjust   = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxxyMyzz) / (abs_internal(mxxyMyzz) + qudricLimitM);
+    mxxyMyzz += wadjust * (-mxxyMyzz);
+    wadjust   = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxxzPyyz) / (abs_internal(mxxzPyyz) + qudricLimitP);
+    mxxzPyyz += wadjust * (-mxxzPyyz);
+    wadjust   = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxxzMyyz) / (abs_internal(mxxzMyyz) + qudricLimitM);
+    mxxzMyyz += wadjust * (-mxxzMyyz);
+    wadjust   = OxyyPxzz + (c1o1 - OxyyPxzz)*abs_internal(mxyyPxzz) / (abs_internal(mxyyPxzz) + qudricLimitP);
+    mxyyPxzz += wadjust * (-mxyyPxzz);
+    wadjust   = OxyyMxzz + (c1o1 - OxyyMxzz)*abs_internal(mxyyMxzz) / (abs_internal(mxyyMxzz) + qudricLimitM);
+    mxyyMxzz += wadjust * (-mxyyMxzz);
+    //////////////////////////////////////////////////////////////////////////
+    // no limiter
+    //mfbbb += OxyyMxzz * (-mfbbb);
+    //mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
+    //mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
+    //mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
+    //mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
+    //mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
+    //mxyyMxzz += OxyyMxzz * (-mxyyMxzz);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute inverse linear combinations of second and third order cumulants
+    //!
+    mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+    mfaca = c1o3 * (-c2o1*  mxxMyy + mxxMzz + mxxPyyPzz);
+    mfaac = c1o3 * (mxxMyy - c2o1* mxxMzz + mxxPyyPzz); 
+    mfcba = ( mxxyMyzz + mxxyPyzz) * c1o2;
+    mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
+    mfcab = ( mxxzMyyz + mxxzPyyz) * c1o2;
+    mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
+    mfbca = ( mxyyMxzz + mxyyPxzz) * c1o2;
+    mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+    //////////////////////////////////////////////////////////////////////////  
+    //////////////////////////////////////////////////////////////////////////
+    //4.
+    // no limiter
+    //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according  to Eq. (43)-(48)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+    CUMacc = -O4*(c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
+    CUMcac = -O4*(c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
+    CUMcca = -O4*(c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
+    CUMbbc = -O4*(c1o1 / omega - c1o2) * Dxy           * c1o3 * B + (c1o1 - O4) * (CUMbbc);
+    CUMbcb = -O4*(c1o1 / omega - c1o2) * Dxz           * c1o3 * B + (c1o1 - O4) * (CUMbcb);
+    CUMcbb = -O4*(c1o1 / omega - c1o2) * Dyz           * c1o3 * B + (c1o1 - O4) * (CUMcbb); 
+    //////////////////////////////////////////////////////////////////////////
+    //5.
+    CUMbcc += O5 * (-CUMbcc);
+    CUMcbc += O5 * (-CUMcbc);
+    CUMccb += O5 * (-CUMccb);   
+    //////////////////////////////////////////////////////////////////////////
+    //6.
+    CUMccc += O6 * (-CUMccc);   
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //! 
+    //////////////////////////////////////////////////////////////////////////
+    //4.
+    mfcbb = CUMcbb + c1o3*((c3o1*mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
+    mfbcb = CUMbcb + c1o3*((c3o1*mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
+    mfbbc = CUMbbc + c1o3*((c3o1*mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho; 
+    mfcca = CUMcca + (((mfcaa * mfaca + c2o1 * mfbba * mfbba)*c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho *  OOrho))*c1o9;
+    mfcac = CUMcac + (((mfcaa * mfaac + c2o1 * mfbab * mfbab)*c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho *  OOrho))*c1o9;
+    mfacc = CUMacc + (((mfaac * mfaca + c2o1 * mfabb * mfabb)*c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho *  OOrho))*c1o9; 
+    //////////////////////////////////////////////////////////////////////////
+    //5.
+    mfbcc = CUMbcc + c1o3 *(c3o1*(mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb +    mfbba * mfabc)) + (mfbca + mfbac)) * OOrho;
+    mfcbc = CUMcbc + c1o3 *(c3o1*(mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab +    mfbba * mfbac)) + (mfcba + mfabc)) * OOrho;
+    mfccb = CUMccb + c1o3 *(c3o1*(mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca +    mfabb * mfcba)) + (mfacb + mfcab)) * OOrho; 
+    //////////////////////////////////////////////////////////////////////////
+    //6.
+    mfccc =	CUMccc - ((-c4o1 *  mfbbb * mfbbb
+            - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca)
+            - c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc)
+            - c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) * OOrho
+            + (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac)
+                + c2o1 * (mfcaa * mfaca * mfaac)
+                + c16o1 *  mfbba * mfbab * mfabb) * OOrho * OOrho
+            - c1o3 * (mfacc + mfcac + mfcca) * OOrho
+            - c1o9 * (mfcaa + mfaca + mfaac) * OOrho
+            + (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba)
+                + (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 *(mfaac + mfaca + mfcaa)) * OOrho * OOrho * c2o3
+            + c1o27*((drho * drho - drho) * OOrho * OOrho));    
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //!
+    mfbaa = -mfbaa;
+    mfaba = -mfaba;
+    mfaab = -mfaab; 
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+    //! see also Eq. (88)-(96) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05  040 ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    vf::lbm::backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
+    vf::lbm::backwardChimera(            mfaba, mfbba, mfcba, vvx, vx2);
+    vf::lbm::backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
+    vf::lbm::backwardChimera(            mfaab, mfbab, mfcab, vvx, vx2);
+    vf::lbm::backwardChimera(            mfabb, mfbbb, mfcbb, vvx, vx2);
+    vf::lbm::backwardChimera(            mfacb, mfbcb, mfccb, vvx, vx2);
+    vf::lbm::backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
+    vf::lbm::backwardChimera(            mfabc, mfbbc, mfcbc, vvx, vx2);
+    vf::lbm::backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);    
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    vf::lbm::backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2,  c6o1,  c1o6);
+    vf::lbm::backwardChimera(            mfaab, mfabb, mfacb, vvy, vy2);
+    vf::lbm::backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
+    vf::lbm::backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2,  c3o2,  c2o3);
+    vf::lbm::backwardChimera(            mfbab, mfbbb, mfbcb, vvy, vy2);
+    vf::lbm::backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2,  c9o2,  c2o9);
+    vf::lbm::backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2,  c6o1,  c1o6);
+    vf::lbm::backwardChimera(            mfcab, mfcbb, mfccb, vvy, vy2);
+    vf::lbm::backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);  
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    vf::lbm::backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
+    vf::lbm::backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
+    vf::lbm::backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2,  c9o4,  c4o9);
+    vf::lbm::backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
+    vf::lbm::backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2,  c9o1,  c1o9);
+    vf::lbm::backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
+
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Write distributions: style of reading and writing the distributions from/to 
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    distribution.f[dir::MZZ] = mfcbb;
+    distribution.f[dir::PZZ] = mfabb;
+    distribution.f[dir::ZMZ] = mfbcb;
+    distribution.f[dir::ZPZ] = mfbab;
+    distribution.f[dir::ZZM] = mfbbc;
+    distribution.f[dir::ZZP] = mfbba;
+    distribution.f[dir::MMZ] = mfccb;
+    distribution.f[dir::PPZ] = mfaab;
+    distribution.f[dir::MPZ] = mfcab;
+    distribution.f[dir::PMZ] = mfacb;
+    distribution.f[dir::MZM] = mfcbc;
+    distribution.f[dir::PZP] = mfaba;
+    distribution.f[dir::MZP] = mfcba;
+    distribution.f[dir::PZM] = mfabc;
+    distribution.f[dir::ZMM] = mfbcc;
+    distribution.f[dir::ZPP] = mfbaa;
+    distribution.f[dir::ZMP] = mfbca;
+    distribution.f[dir::ZPM] = mfbac;
+    distribution.f[dir::MMM] = mfccc;
+    distribution.f[dir::PMM] = mfacc;
+    distribution.f[dir::MPM] = mfcac;
+    distribution.f[dir::PPM] = mfaac;
+    distribution.f[dir::MMP] = mfcca;
+    distribution.f[dir::PMP] = mfaca;
+    distribution.f[dir::MPP] = mfcaa;
+    distribution.f[dir::PPP] = mfaaa;
+    distribution.f[dir::ZZZ] = mfbbb;
+}
+
+
+}
+}
+
diff --git a/src/lbm/CumulantChimera.h b/src/lbm/CumulantChimera.h
new file mode 100644
index 0000000000000000000000000000000000000000..e8740c7d3f5b988a6fdc5c3b16ab6a90e0a28b83
--- /dev/null
+++ b/src/lbm/CumulantChimera.h
@@ -0,0 +1,34 @@
+#ifndef LBM_CUMULANT_CHIMERA_H
+#define LBM_CUMULANT_CHIMERA_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include "KernelParameter.h"
+
+namespace vf
+{
+namespace lbm
+{
+
+__host__ __device__ void setRelaxationRatesK17(real omega, real &OxxPyyPzz, real &OxyyPxzz, real &OxyyMxzz, real &Oxyz,
+                                               real &O4, real &O5, real &O6);
+
+__host__ __device__ void setRelaxationRatesK15(real omega, real &OxxPyyPzz, real &OxyyPxzz, real &OxyyMxzz, real &Oxyz,
+                                               real &O4, real &O5, real &O6);
+
+using RelaxationRatesFunctor = void(*)(real omega, real &OxxPyyPzz, real &OxyyPxzz, real &OxyyMxzz, real &Oxyz,
+                                       real &O4, real &O5, real &O6);
+
+
+__host__ __device__ void cumulantChimera(KernelParameter parameter, RelaxationRatesFunctor setRelaxationRates);
+
+}
+}
+#endif
diff --git a/src/lbm/KernelParameter.cpp b/src/lbm/KernelParameter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e039214d218ef19f35e8adf927f36d3a6f1aa355
--- /dev/null
+++ b/src/lbm/KernelParameter.cpp
@@ -0,0 +1,33 @@
+#include "KernelParameter.h"
+
+#include <cmath>
+
+#include "MacroscopicQuantities.h"
+
+
+namespace vf
+{
+namespace lbm
+{
+
+
+
+inline __host__ __device__ real Distribution27::getDensity_() const
+{
+    return getDensity(f);
+}
+
+
+
+__host__ __device__ real abs_internal(real value)
+{
+#ifdef __CUDA_ARCH__
+    return ::abs(value);
+#else
+    return std::abs(value);
+#endif
+}
+
+
+}
+}
diff --git a/src/lbm/KernelParameter.h b/src/lbm/KernelParameter.h
new file mode 100644
index 0000000000000000000000000000000000000000..95226628110637f3794c8a1f7e6f6c1f6dda937b
--- /dev/null
+++ b/src/lbm/KernelParameter.h
@@ -0,0 +1,43 @@
+#ifndef LBM_KERNEL_PARAMETER_H
+#define LBM_KERNEL_PARAMETER_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+
+namespace vf
+{
+namespace lbm
+{
+
+struct Distribution27
+{
+    real f[27];
+
+    __host__ __device__ real getDensity_() const;
+};
+
+
+__host__ __device__ real abs_internal(real value);
+
+
+struct KernelParameter
+{
+    Distribution27& distribution;
+    real omega;
+    real* forces;
+};
+
+
+
+
+}
+}
+
+#endif
diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h
new file mode 100644
index 0000000000000000000000000000000000000000..c37791294ff5b4edad21795f4ce0a32a18c5d236
--- /dev/null
+++ b/src/lbm/MacroscopicQuantities.h
@@ -0,0 +1,98 @@
+#ifndef LBM_CALCMAC_H
+#define LBM_CALCMAC_H
+
+#ifndef __host__
+#define __host__
+#endif
+#ifndef __device__
+#define __device__
+#endif
+
+#include <basics/Core/DataTypes.h>
+
+#include "constants/NumericConstants.h"
+#include "constants/D3Q27.h"
+
+namespace vf
+{
+namespace lbm
+{
+    
+////////////////////////////////////////////////////////////////////////////////////
+//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa  2015.05.001 ]</b></a>
+//!
+
+inline __host__ __device__ real getDensity(const real *const &f /*[27]*/)
+{
+    return ((f[dir::TNE] + f[dir::BSW]) + (f[dir::TSE] + f[dir::BNW])) + ((f[dir::BSE] + f[dir::TNW]) + (f[dir::TSW] + f[dir::BNE])) +
+           (((f[dir::NE] + f[dir::SW]) + (f[dir::SE] + f[dir::NW])) + ((f[dir::TE] + f[dir::BW]) + (f[dir::BE] + f[dir::TW])) +
+            ((f[dir::BN] + f[dir::TS]) + (f[dir::TN] + f[dir::BS]))) +
+           ((f[dir::E] + f[dir::W]) + (f[dir::N] + f[dir::S]) + (f[dir::T] + f[dir::B])) + f[dir::REST];
+}
+
+/*
+* Incompressible Macroscopic Quantities
+*/
+inline __host__ __device__ real getIncompressibleVelocityX1(const real *const &f /*[27]*/)
+{
+    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::TSE] - f[dir::BNW])) + ((f[dir::BSE] - f[dir::TNW]) + (f[dir::BNE] - f[dir::TSW]))) +
+            (((f[dir::BE] - f[dir::TW]) + (f[dir::TE] - f[dir::BW])) + ((f[dir::SE] - f[dir::NW]) + (f[dir::NE] - f[dir::SW]))) + (f[dir::E] - f[dir::W]));
+}
+
+
+inline __host__ __device__ real getIncompressibleVelocityX2(const real *const &f /*[27]*/)
+{
+    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::BNW] - f[dir::TSE])) + ((f[dir::TNW] - f[dir::BSE]) + (f[dir::BNE] - f[dir::TSW]))) +
+            (((f[dir::BN] - f[dir::TS]) + (f[dir::TN] - f[dir::BS])) + ((f[dir::NW] - f[dir::SE]) + (f[dir::NE] - f[dir::SW]))) + (f[dir::N] - f[dir::S]));
+}
+
+
+inline __host__ __device__ real getIncompressibleVelocityX3(const real *const &f /*[27]*/)
+{
+    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::TSE] - f[dir::BNW])) + ((f[dir::TNW] - f[dir::BSE]) + (f[dir::TSW] - f[dir::BNE]))) +
+            (((f[dir::TS] - f[dir::BN]) + (f[dir::TN] - f[dir::BS])) + ((f[dir::TW] - f[dir::BE]) + (f[dir::TE] - f[dir::BW]))) + (f[dir::T] - f[dir::B]));
+}
+
+
+
+/*
+* Compressible Macroscopic Quantities
+*/
+inline __host__ __device__ real getCompressibleVelocityX1(const real *const &f27, const real& rho)
+{
+    return getIncompressibleVelocityX1(f27) / (rho + constant::c1o1);
+}
+
+
+inline __host__ __device__ real getCompressibleVelocityX2(const real *const &f27, const real& rho)
+{
+    return getIncompressibleVelocityX2(f27) / (rho + constant::c1o1);
+}
+
+
+inline __host__ __device__ real getCompressibleVelocityX3(const real *const &f27, const real& rho)
+{
+    return getIncompressibleVelocityX3(f27) / (rho + constant::c1o1);
+}
+
+/*
+* Pressure
+*/
+inline __host__ __device__ real getPressure(const real *const &f27, const real& rho, const real& vx, const real& vy, const real& vz)
+{
+    return (f27[dir::E] + f27[dir::W] + f27[dir::N] + f27[dir::S] + f27[dir::T] + f27[dir::B] + 
+    constant::c2o1 * (f27[dir::NE] + f27[dir::SW] + f27[dir::SE] + f27[dir::NW] + f27[dir::TE] + 
+                      f27[dir::BW] + f27[dir::BE] + f27[dir::TW] + f27[dir::TN] + f27[dir::BS] + 
+                      f27[dir::BN] + f27[dir::TS]) + 
+    constant::c3o1 * (f27[dir::TNE] + f27[dir::TSW] + f27[dir::TSE] + f27[dir::TNW] + 
+                      f27[dir::BNE] + f27[dir::BSW] + f27[dir::BSE] + f27[dir::BNW]) -
+    rho - (vx * vx + vy * vy + vz * vz) * (constant::c1o1 + rho)) * 
+    constant::c1o2 + rho; // times zero for incompressible case                 
+                          // Attention: op defined directly to op = 1 ; ^^^^(1.0/op-0.5)=0.5
+}
+
+}
+}
+
+#endif
diff --git a/src/lbm/MacroscopicQuantitiesTests.cpp b/src/lbm/MacroscopicQuantitiesTests.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..52f89943a3e6a99fcab13eba22de9959229a289d
--- /dev/null
+++ b/src/lbm/MacroscopicQuantitiesTests.cpp
@@ -0,0 +1,71 @@
+#include <gmock/gmock.h>
+
+#include "MacroscopicQuantities.h"
+#include "constants/D3Q27.h"
+
+
+/*
+* given distributions, which are all one.
+*/
+real f[27] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+
+using namespace vf::lbm;
+
+
+TEST(MacroscopicQuantitiesTest, check_density)
+{
+    const double density = getDensity(f);
+
+    const double expected_density = 27.;
+    ASSERT_THAT(density, testing::DoubleEq(expected_density));
+}
+
+TEST(MacroscopicQuantitiesTest, whenFsAreEqual_velocityInEachDirectionShouldBeZero)
+{
+    const double velocityX1 = getIncompressibleVelocityX1(f);
+    const double velocityX2 = getIncompressibleVelocityX2(f);
+    const double velocityX3 = getIncompressibleVelocityX3(f);
+
+    const double expected_velocity = 0.;
+    EXPECT_THAT(velocityX1, testing::DoubleEq(expected_velocity));
+    EXPECT_THAT(velocityX2, testing::DoubleEq(expected_velocity));
+    EXPECT_THAT(velocityX3, testing::DoubleEq(expected_velocity));
+}
+
+TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Eis2_velocityInX1ShouldBeOne)
+{
+    f[dir::E] = 2.;
+
+    const double velocityX1 = getIncompressibleVelocityX1(f);
+    const double velocityX2 = getIncompressibleVelocityX2(f);
+    const double velocityX3 = getIncompressibleVelocityX3(f);
+
+    const double expected_velocity_x1 = 1.;
+    const double expected_velocity_x2 = 0.;
+    const double expected_velocity_x3 = 0.;
+
+    EXPECT_THAT(velocityX1, testing::DoubleEq(expected_velocity_x1));
+    EXPECT_THAT(velocityX2, testing::DoubleEq(expected_velocity_x2));
+    EXPECT_THAT(velocityX3, testing::DoubleEq(expected_velocity_x3));
+}
+
+TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Nis2_velocityInX2ShouldBeOne)
+{
+    f[dir::N] = 2.;
+
+    const double velocity = getIncompressibleVelocityX2(f);
+
+    const double expected_velocity = 1.;
+    ASSERT_THAT(velocity, testing::DoubleEq(expected_velocity));
+}
+
+
+TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Tis2_velocityInX3ShouldBeOne)
+{
+    f[dir::T] = 2.;
+
+    const double velocity = getIncompressibleVelocityX3(f);
+
+    const double expected_velocity = 1.;
+    ASSERT_THAT(velocity, testing::DoubleEq(expected_velocity));
+}
diff --git a/src/lbm/constants/D3Q27.h b/src/lbm/constants/D3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..0b2cbfdd44e84af590fa2c2be4bf319cbfd4c815
--- /dev/null
+++ b/src/lbm/constants/D3Q27.h
@@ -0,0 +1,71 @@
+#ifndef LBM_D3Q27_H
+#define LBM_D3Q27_H
+
+namespace vf
+{
+namespace lbm
+{
+namespace dir
+{
+
+static constexpr int E    = 0;
+static constexpr int W    = 1;
+static constexpr int N    = 2;
+static constexpr int S    = 3;
+static constexpr int T    = 4;
+static constexpr int B    = 5;
+static constexpr int NE   = 6;
+static constexpr int SW   = 7;
+static constexpr int SE   = 8;
+static constexpr int NW   = 9;
+static constexpr int TE   = 10;
+static constexpr int BW   = 11;
+static constexpr int BE   = 12;
+static constexpr int TW   = 13;
+static constexpr int TN   = 14;
+static constexpr int BS   = 15;
+static constexpr int BN   = 16;
+static constexpr int TS   = 17;
+static constexpr int TNE  = 18;
+static constexpr int TNW  = 19;
+static constexpr int TSE  = 20;
+static constexpr int TSW  = 21;
+static constexpr int BNE  = 22;
+static constexpr int BNW  = 23;
+static constexpr int BSE  = 24;
+static constexpr int BSW  = 25;
+static constexpr int REST = 26;
+
+static constexpr int PZZ = 0;
+static constexpr int MZZ = 1;
+static constexpr int ZPZ = 2;
+static constexpr int ZMZ = 3;
+static constexpr int ZZP = 4;
+static constexpr int ZZM = 5;
+static constexpr int PPZ = 6;
+static constexpr int MMZ = 7;
+static constexpr int PMZ = 8;
+static constexpr int MPZ = 9;
+static constexpr int PZP = 10;
+static constexpr int MZM = 11;
+static constexpr int PZM = 12;
+static constexpr int MZP = 13;
+static constexpr int ZPP = 14;
+static constexpr int ZMM = 15;
+static constexpr int ZPM = 16;
+static constexpr int ZMP = 17;
+static constexpr int PPP = 18;
+static constexpr int MPP = 19;
+static constexpr int PMP = 20;
+static constexpr int MMP = 21;
+static constexpr int PPM = 22;
+static constexpr int MPM = 23;
+static constexpr int PMM = 24;
+static constexpr int MMM = 25;
+static constexpr int ZZZ = 26;
+
+}
+}
+}
+
+#endif
diff --git a/src/lbm/cuda/CMakeLists.txt b/src/lbm/cuda/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be16988a480650cbab416652655af9766bcf8ec7
--- /dev/null
+++ b/src/lbm/cuda/CMakeLists.txt
@@ -0,0 +1,12 @@
+project(lbmCuda LANGUAGES CUDA CXX)
+
+
+vf_add_library(NAME lbmCuda BUILDTYPE static PUBLIC_LINK basics FOLDER ../../lbm)
+
+
+set_target_properties(lbmCuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+set_source_files_properties(../KernelParameter.cpp PROPERTIES LANGUAGE CUDA)
+
+set_source_files_properties(../CumulantChimera.cpp PROPERTIES LANGUAGE CUDA)
+set_source_files_properties(../BGK.cpp PROPERTIES LANGUAGE CUDA)
\ No newline at end of file